Pak choi

Diversity analyses, and summary plots

#corrected dataset with raw counts
data<-read_excel('C:/Users/hrlexd/Dropbox/PlantAndFood (1)/B4BI/Pakchoi/Copy of Pak choi pollinators raw visits Brad correctedupdatedEddy.xlsx',sheet='Pak choi raw visits')

#filter out things that are 2018 in Boundary age column
data<-data %>% filter(`Boundary age`!=2018)
#there is two add balls that only are recorded in one season can drop them here with this: 
data<-data %>% filter(!(Season=='2023/24'& Boundary_descriptor=='one year old'))
data<-data %>% filter(!(Season=='2023/24'& Boundary_descriptor=='two year old'))


brads_col_2<-read_excel('C:/Users/hrlexd/Dropbox/PlantAndFood (1)/B4BI/Pakchoi/Eddy Pie chart short.xlsx',sheet='Colours')
#tidy up some names etc
data$Boundary_descriptor<-gsub('one year old','One year old',data$Boundary_descriptor)
data$Boundary_descriptor<-gsub('two year old','Two year old',data$Boundary_descriptor)
data$Boundary_descriptor<-gsub('three year old','Three year old',data$Boundary_descriptor)
data$Boundary_descriptor<-gsub('Bare_fence','On-farm bare fence',data$Boundary_descriptor)
data$Boundary_descriptor<-gsub('Control_farm','Off-farm bare fence',data$Boundary_descriptor)

#summarising counts to clean up dataset etc for box plots
data_box<-data %>% select(-`Broad Insect category`,-Origin,-`Boundary age`,-`Sum of Insect_Count`,-`Flower count`) %>% group_by(`Farm type`,Farm_name,Season,Boundary_descriptor,`Insect key number`,`New Inseck key name`) %>% summarise(`Sum of Insects/100 flowers`=sum(`Sum of Insects/100 flowers`) )
## `summarise()` has grouped output by 'Farm type', 'Farm_name', 'Season',
## 'Boundary_descriptor', 'Insect key number'. You can override using the
## `.groups` argument.
#counts per for below graph of abundance estimate per site
data_per<-data_box%>% mutate(Boundary_year=paste0(Boundary_descriptor,'_',Season)) %>% select(-`Insect key number`,-Boundary_descriptor,-Season) %>% group_by(`Farm type`,Farm_name,Boundary_year,`New Inseck key name`) %>% summarise(total_count=sum(`Sum of Insects/100 flowers`)) %>% ungroup() %>% group_by(`Farm type`,Farm_name,Boundary_year) %>% mutate(per=100*total_count/sum(total_count)) %>% ungroup()
## Adding missing grouping variables: `Season`, `Boundary_descriptor`, `Insect key
## number`
## `summarise()` has grouped output by 'Farm type', 'Farm_name', 'Boundary_year'.
## You can override using the `.groups` argument.
data_per %>%   ggplot(aes(x = Boundary_year, y = per, fill = `New Inseck key name`)) +
  geom_bar(position='stack',stat='identity',width=0.4) +
  facet_grid(`Farm type`~Farm_name, scales = "free", space = "free") +
  theme_bw(base_size = 12) +
  theme(strip.text.x = element_text(angle = 90,size=7),axis.text.x = element_text(angle = 90,size=7,hjust=1))+
  ggtitle(paste('test'))+
 theme(legend.position="none")

mydf<- data_box %>% mutate(Boundary_year=paste0(Boundary_descriptor,'_',Season))

#setting up for brads colour scheme
#relevel to brads order
brads_col_2$`New Inseck key name` <- factor(brads_col_2$`New Inseck key name`, levels=brads_col_2$`New Inseck key name`[order(brads_col_2$`Insect key number`)], ordered=TRUE)

#relevel to brads order
mydf$`New Inseck key name` <- forcats::fct_relevel(mydf$`New Inseck key name` ,levels(brads_col_2$`New Inseck key name`))

Diversity statistics

Bar charts to view diversity over different groups

##bar chart raw abundances .....
#cant just be raw count
#needs to be average by sampling number


mydf_byboundary_farm_count<-mydf %>% select(-`Insect key number`,-`Farm type`,-Boundary_year) %>% mutate(unique_ob=paste(Boundary_descriptor,Farm_name,Season)) %>% select(-Farm_name,-Season) %>% group_by(Boundary_descriptor,`New Inseck key name`) %>% summarise(count_farms_perobs=sum(`Sum of Insects/100 flowers`)/n_distinct(unique_ob)) 
## Adding missing grouping variables: `Farm type`, `Insect key number`
## Adding missing grouping variables: `Farm_name`, `Season`
## `summarise()` has grouped output by 'Boundary_descriptor'. You can override
## using the `.groups` argument.
mydf_byboundary_farm_count$Boundary_descriptor <- factor(mydf_byboundary_farm_count$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

mydf_byboundary_farm_count %>%   ggplot(aes(x = Boundary_descriptor, y = count_farms_perobs, fill = `New Inseck key name`)) +
  geom_bar(position='stack',stat='identity',width=0.4) +
  # facet_grid(`Farm type`~Farm_name, scales = "free", space = "free") +
  theme_bw(base_size = 12) +
  theme(strip.text.x = element_text(angle = 90,size=7),axis.text.x = element_text(angle = 90,size=7,hjust=1))+
  scale_fill_manual(values=with(brads_col_2,setNames(Brad_colours,`New Inseck key name`)))+
  ggtitle(paste('Counts by boundary'))+ylab('Average count per 100 flowers')+xlab('Boundary type')+
  labs(fill = "Insect  group")+
  theme(
    legend.text = element_text(size = 6), 
    legend.title = element_text(size = 6),
    legend.key.size = unit(1,"line"))  

#bar chart relative abundances % bare fence, control year 1, year 2 year3 and old
mydf_byboundary<-mydf%>% select(-`Insect key number`,-Season,-`Farm type`,-Farm_name,-Boundary_year) %>% group_by(Boundary_descriptor,`New Inseck key name`) %>% summarise(total_count=sum(`Sum of Insects/100 flowers`)) %>% ungroup() %>% group_by(Boundary_descriptor) %>% mutate(per=100*total_count/sum(total_count)) %>% ungroup()
## Adding missing grouping variables: `Farm type`, `Farm_name`, `Season`, `Insect
## key number`
## `summarise()` has grouped output by 'Boundary_descriptor'. You can override
## using the `.groups` argument.
#recolour and tidy
mydf_byboundary$Boundary_descriptor <- factor(mydf_byboundary$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

mydf_byboundary %>%   ggplot(aes(x = Boundary_descriptor, y = per, fill = `New Inseck key name`)) +
  geom_bar(position='stack',stat='identity',width=0.4) +
  # facet_grid(`Farm type`~Farm_name, scales = "free", space = "free") +
  theme_bw(base_size = 12) +
  theme(strip.text.x = element_text(angle = 90,size=7),axis.text.x = element_text(angle = 90,size=7,hjust=1),plot.title = element_text(size = 13))+
  scale_fill_manual(values=with(brads_col_2,setNames(Brad_colours,`New Inseck key name`)))+
  ggtitle(paste('Relative abundance by boundary'))+ylab('Proportion')+xlab('Boundary type')+
  labs(fill = "Insect  group")+
  theme(
    legend.text = element_text(size = 6), 
    legend.title = element_text(size = 6),
    legend.key.size = unit(1,"line"))  

Bar charts split by season

Alpha diversity (below) suggests there might be a role of season, so just splitting that out here. Season makes sense as the second year had a lot less counts than the first.

#Adding in season
##bar chart raw abundances .....
#cant just be raw count
#needs to be average by sampling number

mydf_byboundary_farm_count_yr<-mydf %>% select(-`Insect key number`,-`Farm type`,-Boundary_descriptor,-Season) %>% mutate(unique_ob=paste(Boundary_year,Farm_name)) %>% select(-Farm_name) %>% group_by(Boundary_year,`New Inseck key name`) %>% summarise(count_farms_perobs=sum(`Sum of Insects/100 flowers`)/n_distinct(unique_ob)) 
## Adding missing grouping variables: `Farm type`, `Season`,
## `Boundary_descriptor`, `Insect key number`
## Adding missing grouping variables: `Farm_name`
## `summarise()` has grouped output by 'Boundary_year'. You can override using the
## `.groups` argument.
mydf_byboundary_farm_count_yr<-mydf_byboundary_farm_count_yr %>% separate_wider_delim(Boundary_year,'_',names=c("Boundary_descriptor","Season"))

mydf_byboundary_farm_count_yr$Boundary_descriptor <- factor(mydf_byboundary_farm_count_yr$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

mydf_byboundary_farm_count_yr %>%   ggplot(aes(x = Boundary_descriptor, y = count_farms_perobs, fill = `New Inseck key name`)) +
  geom_bar(position='stack',stat='identity',width=0.4) +
  facet_grid(~Season, scales = "free") +
  theme_bw(base_size = 12) +
  theme(strip.text.x = element_text(angle = 0,size=7),axis.text.x = element_text(angle = 90,size=7,hjust=1),plot.title = element_text(size = 13))+
  scale_fill_manual(values=with(brads_col_2,setNames(Brad_colours,`New Inseck key name`)))+
  ggtitle(paste('Counts by boundary'))+ylab('Average count per 100 flowers')+xlab('Boundary type')+
  labs(fill = "Insect  group")+
  theme(
    legend.text = element_text(size = 6), 
    legend.title = element_text(size = 6),
    legend.key.size = unit(1,"line"))  

#adding in season 
#bar chart relative abundances % bare fence, control year 1, year 2 year3 and old

mydf_byboundary_yr<-mydf%>% select(-`Insect key number`,-Season,-`Farm type`,-Farm_name,-Boundary_descriptor) %>% group_by(Boundary_year,`New Inseck key name`) %>% summarise(total_count=sum(`Sum of Insects/100 flowers`)) %>% ungroup() %>% group_by(Boundary_year) %>% mutate(per=100*total_count/sum(total_count)) %>% ungroup()
## Adding missing grouping variables: `Farm type`, `Farm_name`, `Season`,
## `Boundary_descriptor`, `Insect key number`
## `summarise()` has grouped output by 'Boundary_year'. You can override using the
## `.groups` argument.
mydf_byboundary_yr<-mydf_byboundary_yr %>% separate_wider_delim(Boundary_year,'_',names=c("Boundary_descriptor","Season"))

#recolour and tidy
mydf_byboundary_yr$Boundary_descriptor <- factor(mydf_byboundary_yr$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

mydf_byboundary_yr %>%   ggplot(aes(x = Boundary_descriptor, y = per, fill = `New Inseck key name`)) +
  geom_bar(position='stack',stat='identity',width=0.4) +
  facet_wrap(~Season,scales="free") +
  theme_bw(base_size = 12) +
  theme(strip.text.x = element_text(angle = 0,size=7),axis.text.x = element_text(angle = 90,size=7,hjust=1),plot.title = element_text(size = 13))+
  scale_fill_manual(values=with(brads_col_2,setNames(Brad_colours,`New Inseck key name`)))+
  ggtitle(paste('Relative abundance by boundary'))+ylab('Proportion')+xlab('Boundary type')+
  labs(fill = "Insect  group")+
  theme(
    legend.text = element_text(size = 6), 
    legend.title = element_text(size = 6),
    legend.key.size = unit(1,"line"))  

Just a base tests to show that there is significantly more counts by planting age

Just mucking around trying to think of ways to highlight the great raw counts in old, 3 years etc

#switching to raw counts
data<-read_excel('C:/Users/hrlexd/Dropbox/PlantAndFood (1)/B4BI/Pakchoi/Copy of Pak choi pollinators raw visits Brad correctedupdatedEddy.xlsx',sheet='Pak choi raw visits')

#filter out things that are 2018 in Boundary age column
data<-data %>% filter(`Boundary age`!=2018)
#two odd balls that are only boundary for this season
data<-data %>% filter(!(Season=='2023/24'& Boundary_descriptor=='one year old'))
data<-data %>% filter(!(Season=='2023/24'& Boundary_descriptor=='two year old'))

brads_col_2<-read_excel('C:/Users/hrlexd/Dropbox/PlantAndFood (1)/B4BI/Pakchoi/Eddy Pie chart short.xlsx',sheet='Colours')
#tidy up some names etc
data$Boundary_descriptor<-gsub('one year old','One year old',data$Boundary_descriptor)
data$Boundary_descriptor<-gsub('two year old','Two year old',data$Boundary_descriptor)
data$Boundary_descriptor<-gsub('three year old','Three year old',data$Boundary_descriptor)
data$Boundary_descriptor<-gsub('Bare_fence','On-farm bare fence',data$Boundary_descriptor) #was Bare fence
data$Boundary_descriptor<-gsub('Control_farm','Off-farm bare fence',data$Boundary_descriptor) #was Control farm

unique(data$Boundary_descriptor)
## [1] "Off-farm bare fence" "One year old"        "Two year old"       
## [4] "Three year old"      "Old"                 "On-farm bare fence"
colnames(data)
##  [1] "Farm type"                  "Farm_name"                 
##  [3] "Season"                     "Boundary_descriptor"       
##  [5] "Boundary age"               "Insect key number"         
##  [7] "New Inseck key name"        "Sum of Insects/100 flowers"
##  [9] "Broad Insect category"      "Origin"                    
## [11] "Sum of Insect_Count"        "Flower count"
#summarising counts to clean up dataset etc
mydf<-data %>% select(-`Broad Insect category`,-Origin,-`Boundary age`,-`Sum of Insects/100 flowers`) 

mydf<-mydf %>% mutate(unique_sample=paste(Boundary_descriptor,Farm_name,Season))

counts<-mydf %>% ungroup() %>% select(unique_sample,`New Inseck key name`,`Sum of Insect_Count`)

#transform wide
counts_wide<-counts %>% pivot_wider(names_from=`New Inseck key name`,values_from = `Sum of Insect_Count`)
counts_wide<-counts_wide %>% remove_rownames %>% column_to_rownames(var="unique_sample")

#meta data
meta_data<-read_excel('C:/Users/hrlexd/Dropbox/PlantAndFood (1)/B4BI/Pakchoi/Copy of Meta_data_pakchoiBH.xlsx',sheet='Meta_data_pakchoi')
rownames(meta_data) <- meta_data$unique_sample
meta_data<-meta_data %>% mutate(month_split=paste(Collection_month,Beg_Mid_end,sep='_'))
meta_data<-meta_data %>% filter(!(Season=='2023/24'& Boundary_descriptor=='One year old'))
meta_data<-meta_data %>% filter(!(Season=='2023/24'& Boundary_descriptor=='Two year old'))
#relevel
meta_data<-meta_data %>%   mutate(Boundary_descriptor=fct_relevel(Boundary_descriptor,c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old')))

#check meta and count data match
setdiff(row.names(counts_wide),meta_data$unique_sample)
## character(0)
#brad colour scheme (control to old):
col_bound<-c('chocolate','#FDE725FF',"#5DC863FF","#21908CFF",'#3B528BFF',"#440154FF")
#col_bound

#ready to analyse!

insect_counts<-rowSums(counts_wide)

insect_counts_df<-data.frame(insect_counts)
insect_counts_df <- rownames_to_column(insect_counts_df, var = "unique_sample")
insect_counts_df<-left_join(insect_counts_df,meta_data)
## Joining with `by = join_by(unique_sample)`
insect_counts_df$Boundary_descriptor <- factor(insect_counts_df$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)
#split by season
plot_sppr <- ggplot(insect_counts_df, aes(x = Boundary_descriptor, y = insect_counts, fill = Boundary_descriptor)) +
  geom_boxplot() +
#  facet_wrap(~Season)+
  theme_bw()+
  scale_fill_manual(values=col_bound)+
  labs(x = "Boundary type",
       y = "Raw counts",
       title = "Raw counts by boundary",
       fill = "Boundary type")+
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

plot_sppr

#split by season
plot_sppr <- ggplot(insect_counts_df, aes(x = Boundary_descriptor, y = insect_counts, fill = Boundary_descriptor)) +
  geom_boxplot() +
  facet_wrap(~Season,scales="free")+
  theme_bw()+
  scale_fill_manual(values=col_bound)+
  labs(x = "Boundary type",
       y = "Raw counts",
       title = "Raw counts by boundary",
       fill = "Boundary type")+
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

plot_sppr

#issue with model convergence with some random effects so:
m0<-lmer(insect_counts ~ Boundary_descriptor + offset(log(`Flower count`)) +(1|Season) , data = meta_data)
m1<-lmer(insect_counts ~ Boundary_descriptor + offset(log(`Flower count`)) +(1|Season) +(1|Site), data = meta_data)
m3<-lmer(insect_counts ~ Boundary_descriptor + `Flower count` +(1|Season) , data = meta_data)
m4<-lmer(insect_counts ~ Boundary_descriptor + `Flower count` +(1|Season) +(1|Site), data = meta_data)

#im getting a boundary (singular) fit warning with month_split so dropping for this

AIC(m0,m1,m3,m4)
##    df      AIC
## m0  8 1129.616
## m1  9 1131.376
## m3  9 1120.980
## m4 10 1121.583
anova(m0,m1,m3,m4)
## refitting model(s) with ML (instead of REML)
## Data: meta_data
## Models:
## m0: insect_counts ~ Boundary_descriptor + offset(log(`Flower count`)) + (1 | Season)
## m1: insect_counts ~ Boundary_descriptor + offset(log(`Flower count`)) + (1 | Season) + (1 | Site)
## m3: insect_counts ~ Boundary_descriptor + `Flower count` + (1 | Season)
## m4: insect_counts ~ Boundary_descriptor + `Flower count` + (1 | Season) + (1 | Site)
##    npar  AIC    BIC  logLik -2*log(L)   Chisq Df Pr(>Chisq)
## m0    8 1170 1191.9 -577.02      1154                      
## m1    9 1172 1196.5 -576.99      1154  0.0540  1     0.8162
## m3    9 1157 1181.5 -569.49      1139 15.0041  0           
## m4   10 1158 1185.3 -568.99      1138  0.9996  1     0.3174
#m0<-lmer(insect_counts ~ Boundary_descriptor + offset(log(`Flower count`)) +(1|Season) +(1|Site), data = meta_data)
m0<-lmer(insect_counts ~ Boundary_descriptor + `Flower count` +(1|Season) +(1|Site), data = meta_data)

qqnorm(residuals(m0))
qqline(residuals(m0))

#probably better of in glmer (right hand skew)

m0<-glmer(insect_counts ~ Boundary_descriptor + offset(log(`Flower count`)) +(1|Season) , data = meta_data, family = poisson)
m1<-glmer(insect_counts ~ Boundary_descriptor + offset(log(`Flower count`)) +(1|Season) +(1|Site) , data = meta_data, family = poisson)
m2<-glmer(insect_counts ~ Boundary_descriptor + log(`Flower count`) +(1|Season)  , data = meta_data, family = poisson)
m3<-glmer(insect_counts ~ Boundary_descriptor + log(`Flower count`) +(1|Season) +(1|Site) , data = meta_data, family = poisson)
m4<-glmer.nb(insect_counts ~ Boundary_descriptor + offset(log(`Flower count`)) +(1|Season) +(1|Site) , data = meta_data)
m5<-glmer.nb(insect_counts ~ Boundary_descriptor + offset(log(`Flower count`)) +(1|Season) , data = meta_data)
m6<-glmer.nb(insect_counts ~ Boundary_descriptor + log(`Flower count`) +(1|Season) , data = meta_data)
m7<-glmer.nb(insect_counts ~ Boundary_descriptor + log(`Flower count`) +(1|Season) +(1|Site), data = meta_data)
AIC(m0,m1,m2,m3,m4,m5,m6,m7)
##    df      AIC
## m0  7 2682.530
## m1  8 1782.832
## m2  8 2649.526
## m3  9 1780.580
## m4  9 1091.027
## m5  8 1092.323
## m6  9 1093.274
## m7 10 1092.317
#actually over dispersed anyways so using nb
m0<-glmer.nb(insect_counts ~ Boundary_descriptor + log(`Flower count`) +(1|Season) +(1|Site), data = meta_data)
res<-simulateResiduals(m0)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.66453, p-value = 0.744
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
performance::check_overdispersion(m0)
## # Overdispersion test
## 
##  dispersion ratio = 0.605
##           p-value = 0.544
## No overdispersion detected.
summary(m0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Negative Binomial(4.0803)  ( log )
## Formula: insect_counts ~ Boundary_descriptor + log(`Flower count`) + (1 |  
##     Season) + (1 | Site)
##    Data: meta_data
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1092.3    1119.6    -536.2    1072.3       103 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6404 -0.5704 -0.0903  0.3601  2.5179 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  Site   (Intercept) 0.06766  0.2601  
##  Season (Intercept) 0.11075  0.3328  
## Number of obs: 113, groups:  Site, 44; Season, 3
## 
## Fixed effects:
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           -1.163108   0.955819  -1.217   0.2237    
## Boundary_descriptorOn-farm bare fence -0.008809   0.182409  -0.048   0.9615    
## Boundary_descriptorOne year old        0.667203   0.277327   2.406   0.0161 *  
## Boundary_descriptorTwo year old        0.694029   0.276708   2.508   0.0121 *  
## Boundary_descriptorThree year old      1.095612   0.265137   4.132 3.59e-05 ***
## Boundary_descriptorOld                 0.894571   0.169159   5.288 1.23e-07 ***
## log(`Flower count`)                    0.855790   0.170395   5.022 5.10e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) B_O-bf Bn_Oyo Bndry_dscrptrTwyo Bndry_dscrptrThyo
## Bndry_dO-bf       -0.048                                                  
## Bndry_dsOyo       -0.083  0.331                                           
## Bndry_dscrptrTwyo  0.132  0.341  0.279                                    
## Bndry_dscrptrThyo -0.188  0.305  0.311  0.272                             
## Bndry_dscrO       -0.127  0.521  0.327  0.358             0.373           
## lg(`Flcnt`)       -0.970 -0.047  0.023 -0.201             0.128           
##                   Bndr_O
## Bndry_dO-bf             
## Bndry_dsOyo             
## Bndry_dscrptrTwyo       
## Bndry_dscrptrThyo       
## Bndry_dscrO             
## lg(`Flcnt`)        0.026
#kable(summary(m0), format="html")
emm2 <- emmeans(m0, ~ Boundary_descriptor,type = "response" )

pairs(emm2,  adjust = "tukey")
##  contrast                                     ratio     SE  df null z.ratio
##  (Off-farm bare fence) / (On-farm bare fence) 1.009 0.1840 Inf    1   0.048
##  (Off-farm bare fence) / One year old         0.513 0.1420 Inf    1  -2.406
##  (Off-farm bare fence) / Two year old         0.500 0.1380 Inf    1  -2.508
##  (Off-farm bare fence) / Three year old       0.334 0.0886 Inf    1  -4.132
##  (Off-farm bare fence) / Old                  0.409 0.0691 Inf    1  -5.288
##  (On-farm bare fence) / One year old          0.509 0.1410 Inf    1  -2.441
##  (On-farm bare fence) / Two year old          0.495 0.1360 Inf    1  -2.560
##  (On-farm bare fence) / Three year old        0.331 0.0902 Inf    1  -4.057
##  (On-farm bare fence) / Old                   0.405 0.0699 Inf    1  -5.238
##  One year old / Two year old                  0.974 0.3240 Inf    1  -0.081
##  One year old / Three year old                0.652 0.2080 Inf    1  -1.345
##  One year old / Old                           0.797 0.2180 Inf    1  -0.831
##  Two year old / Three year old                0.669 0.2190 Inf    1  -1.228
##  Two year old / Old                           0.818 0.2190 Inf    1  -0.749
##  Three year old / Old                         1.223 0.3130 Inf    1   0.786
##  p.value
##   1.0000
##   0.1541
##   0.1216
##   0.0005
##   <.0001
##   0.1423
##   0.1073
##   0.0007
##   <.0001
##   1.0000
##   0.7597
##   0.9618
##   0.8233
##   0.9757
##   0.9700
## 
## P value adjustment: tukey method for comparing a family of 6 estimates 
## Tests are performed on the log scale
#running each as I think all comparisons is a bit over the top and probably repeating things
cld_rich <- multcomp::cld(emm2, Letters = letters, adjust = "tukey")
## Note: adjust = "tukey" was changed to "sidak"
## because "tukey" is only appropriate for one set of pairwise comparisons
colnames(cld_rich)
## [1] "Boundary_descriptor" "response"            "SE"                 
## [4] "df"                  "asymp.LCL"           "asymp.UCL"          
## [7] ".group"
cld_rich$Boundary_descriptor <- factor(cld_rich$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

ggplot(cld_rich, aes(x = Boundary_descriptor, y = response)) +
  theme_bw()+
  geom_errorbar(aes(ymin = asymp.LCL, ymax =asymp.UCL), width = 0.2) +
  geom_point(size = 3,aes(colour=Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
# not sure if groupings will make sense with facet wrap
#  geom_text(aes(label = .group, y = asymp.UCL + 0.2), vjust = 0, size = 5) +
  labs(y = "Estimated counts", x = "Boundary type", colour='Boundary type') +
    geom_text(aes(label = trimws(.group)),  hjust = -0.75) +
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

#final model but looking at impact with season
m0<-glmer.nb(insect_counts ~ Boundary_descriptor+ Season  + log(`Flower count`) + (1|Site) , data = meta_data)
summary(m0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: Negative Binomial(4.189)  ( log )
## Formula: insect_counts ~ Boundary_descriptor + Season + log(`Flower count`) +  
##     (1 | Site)
##    Data: meta_data
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##    1083.3    1113.3    -530.7    1061.3       102 
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.65210 -0.56579 -0.04722  0.34525  2.53329 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  Site   (Intercept) 0.0656   0.2561  
## Number of obs: 113, groups:  Site, 44
## 
## Fixed effects:
##                                        Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           -0.961640   0.884490  -1.087  0.27694    
## Boundary_descriptorOn-farm bare fence -0.009675   0.180223  -0.054  0.95719    
## Boundary_descriptorOne year old        0.641861   0.274155   2.341  0.01922 *  
## Boundary_descriptorTwo year old        0.719073   0.272686   2.637  0.00836 ** 
## Boundary_descriptorThree year old      1.102199   0.262669   4.196 2.71e-05 ***
## Boundary_descriptorOld                 0.901717   0.166948   5.401 6.62e-08 ***
## Season2022/23                         -0.859946   0.148472  -5.792 6.96e-09 ***
## Season2023/24                         -0.366372   0.143046  -2.561  0.01043 *  
## log(`Flower count`)                    0.893386   0.167241   5.342 9.20e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) B_O-bf Bn_Oyo Bndry_dscrptrTwyo Bndry_dscrptrThyo
## Bndry_dO-bf       -0.055                                                  
## Bndry_dsOyo       -0.116  0.332                                           
## Bndry_dscrptrTwyo  0.157  0.344  0.284                                    
## Bndry_dscrptrThyo -0.183  0.301  0.306  0.266                             
## Bndry_dscrO       -0.124  0.521  0.328  0.357             0.372           
## Sesn2022/23        0.368  0.018  0.186 -0.153            -0.076           
## Sesn2023/24        0.255  0.092  0.225  0.083            -0.284           
## lg(`Flcnt`)       -0.986 -0.049  0.028 -0.214             0.131           
##                   Bndr_O S2022/ S2023/
## Bndry_dO-bf                           
## Bndry_dsOyo                           
## Bndry_dscrptrTwyo                     
## Bndry_dscrptrThyo                     
## Bndry_dscrO                           
## Sesn2022/23       -0.087              
## Sesn2023/24       -0.056  0.561       
## lg(`Flcnt`)        0.023 -0.436 -0.332
res<-simulateResiduals(m0)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.85403, p-value = 0.776
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
performance::check_overdispersion(m0)
## # Overdispersion test
## 
##  dispersion ratio = 0.854
##           p-value = 0.776
## No overdispersion detected.
#tbl_regression(m0) #still going to have to copy over so no point

#kable(summary(m0), format="html")
emm2 <- emmeans(m0, ~ Boundary_descriptor,type = "response", weights = "proportional")
pairs(emm2, adjust = "tukey")
##  contrast                                     ratio     SE  df null z.ratio
##  (Off-farm bare fence) / (On-farm bare fence) 1.010 0.1820 Inf    1   0.054
##  (Off-farm bare fence) / One year old         0.526 0.1440 Inf    1  -2.341
##  (Off-farm bare fence) / Two year old         0.487 0.1330 Inf    1  -2.637
##  (Off-farm bare fence) / Three year old       0.332 0.0872 Inf    1  -4.196
##  (Off-farm bare fence) / Old                  0.406 0.0678 Inf    1  -5.401
##  (On-farm bare fence) / One year old          0.521 0.1430 Inf    1  -2.381
##  (On-farm bare fence) / Two year old          0.483 0.1300 Inf    1  -2.696
##  (On-farm bare fence) / Three year old        0.329 0.0889 Inf    1  -4.116
##  (On-farm bare fence) / Old                   0.402 0.0685 Inf    1  -5.351
##  One year old / Two year old                  0.926 0.3030 Inf    1  -0.236
##  One year old / Three year old                0.631 0.2000 Inf    1  -1.455
##  One year old / Old                           0.771 0.2080 Inf    1  -0.962
##  Two year old / Three year old                0.682 0.2210 Inf    1  -1.181
##  Two year old / Old                           0.833 0.2200 Inf    1  -0.692
##  Three year old / Old                         1.222 0.3100 Inf    1   0.791
##  p.value
##   1.0000
##   0.1776
##   0.0885
##   0.0004
##   <.0001
##   0.1627
##   0.0759
##   0.0006
##   <.0001
##   0.9999
##   0.6933
##   0.9300
##   0.8463
##   0.9829
##   0.9691
## 
## Results are averaged over the levels of: Season 
## P value adjustment: tukey method for comparing a family of 6 estimates 
## Tests are performed on the log scale
cld_rich <- multcomp::cld(emm2, Letters = letters)
colnames(cld_rich)
## [1] "Boundary_descriptor" "response"            "SE"                 
## [4] "df"                  "asymp.LCL"           "asymp.UCL"          
## [7] ".group"
cld_rich$Boundary_descriptor <- factor(cld_rich$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

ggplot(cld_rich, aes(x = Boundary_descriptor, y = response)) +
  theme_bw()+
  geom_errorbar(aes(ymin = asymp.LCL, ymax =asymp.UCL), width = 0.2) +
  geom_point(size = 3,aes(colour=Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
# not sure if groupings will make sense with facet wrap
#  geom_text(aes(label = .group, y = asymp.UCL + 0.2), vjust = 0, size = 5) +
  labs(y = "Estimated counts", x = "Boundary type", colour='Boundary type') +
    geom_text(aes(label = trimws(.group)),  hjust = -0.75) +
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

emm2 <- emmeans(m0, ~ Season,type = "response",  weights = "proportional")
pairs(emm2, adjust = "tukey")
##  contrast              ratio     SE  df null z.ratio p.value
##  (2021/22) / (2022/23)  2.36 0.3510 Inf    1   5.792  <.0001
##  (2021/22) / (2023/24)  1.44 0.2060 Inf    1   2.561  0.0281
##  (2022/23) / (2023/24)  0.61 0.0834 Inf    1  -3.613  0.0009
## 
## Results are averaged over the levels of: Boundary_descriptor 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log scale
#pairs(emm2,  adjust = "tukey")

Alpha diversity

Species count using species count Im switching to raw counts here, one issue is that flower number varies so need to check the relationship between diversity and flower number

#Alpha diversity
#one matrix of the counts
#one metadata file

#species accumulation curve
speccurve<-specaccum(counts_wide,method='random',permutations=1000)
plot(speccurve)

#count species per property
sppr <- specnumber(counts_wide)
head(sppr)
## Off-farm bare fence Ahuriri 2021/22        One year old Ahuriri 2021/22 
##                                   4                                   5 
## Off-farm bare fence Ahuriri 2022/23        Two year old Ahuriri 2022/23 
##                                   3                                   1 
## Off-farm bare fence Ahuriri 2023/24      Three year old Ahuriri 2023/24 
##                                   5                                  15
#can plot that out
sppr_df <- sppr %>% 
  enframe() %>% 
  full_join(meta_data, by = c("name" = "unique_sample"))

sppr_df$Boundary_descriptor <- factor(sppr_df$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

#just boundary
plot_sppr <- ggplot(sppr_df, aes(x = Boundary_descriptor, y = value, fill = Boundary_descriptor)) +
  geom_boxplot() +
  theme_bw()+
  scale_fill_manual(values=col_bound)+
  labs(x = "Boundary type",
       y = "Number of species per site",
       title = "Species richness: species count",
       fill = "Boundary type")
plot_sppr

#other significant factor was season
plot_sppr <- ggplot(sppr_df, aes(x = Boundary_descriptor, y = value, fill = Boundary_descriptor)) +
  geom_boxplot() +
facet_wrap(~Season,scales="free")+
    scale_fill_manual(values=col_bound)+
  theme_bw()+
  labs(x = "Boundary type",
       y = "Number of species per site",
       title = "Species richness: species count",
       fill = "Boundary type")+
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

plot_sppr

#flower number:
plot_sppr <- ggplot(sppr_df, aes(x = `Flower count`, y = value)) +
  geom_point(aes(colour = Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
  stat_poly_line() +
  stat_poly_eq() +
   theme_bw()+
  labs(x = "Flower Number",
       y = "Number of species per site",
       title = "Species richness: species count", colour = "Boundary type")+
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

plot_sppr

####season as a random effect
#date split by month (early, mid, late) and farm name
m0<-lmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season), data = meta_data)
m1<-lmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Farm_name) , data = meta_data)
m2<-lmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Collection_month) + (1|Farm_name), data = meta_data)
m3<-lmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|month_split) + (1|Farm_name), data = meta_data)
m4<-lmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|month_split) + (1|Site), data = meta_data)
m5<-lmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Collection_month) + (1|Site), data = meta_data)
m6<-lmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Site), data = meta_data)

AIC(m0,m1,m2,m3,m4,m5,m6)
##    df      AIC
## m0  9 586.8866
## m1 10 588.0214
## m2 11 586.8707
## m3 11 586.8820
## m4 11 587.4566
## m5 11 587.8580
## m6 10 588.6524
anova(m0,m1,m2,m3,m4,m5,m6)
## refitting model(s) with ML (instead of REML)
## Data: meta_data
## Models:
## m0: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season)
## m1: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Farm_name)
## m6: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Site)
## m2: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Collection_month) + (1 | Farm_name)
## m3: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | month_split) + (1 | Farm_name)
## m4: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | month_split) + (1 | Site)
## m5: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Collection_month) + (1 | Site)
##    npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)  
## m0    9 599.55 624.09 -290.77    581.55                       
## m1   10 600.61 627.89 -290.31    580.61 0.9335  1    0.33395  
## m6   10 601.45 628.72 -290.73    581.45 0.0000  0             
## m2   11 599.50 629.50 -288.75    577.50 3.9466  1    0.04697 *
## m3   11 598.92 628.92 -288.46    576.92 0.5800  0             
## m4   11 599.69 629.69 -288.84    577.69 0.0000  0             
## m5   11 600.75 630.75 -289.37    578.75 0.0000  0             
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#this is all really marginal so be honest

#I think for the paper site needs to go in as a random effect so I've settled on: 
#month is marginal and difficult to work out the right cuts offs without ending up right down the bottom for our group size
#month_split + farm name + season
m3<-lmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Site), data = meta_data)
qqnorm(residuals(m3))
qqline(residuals(m3))

plot(fitted(m3), residuals(m3))

#its right hand skewed so trying glmer
m0<-lmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Site), data = meta_data)
m1<-glmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Site), data = meta_data,family = poisson)
m2<-glmer.nb(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Site), data = meta_data)
## Warning in theta.ml(Y, mu, weights = object@resp$weights, limit = limit, :
## iteration limit reached
AIC(m0,m1,m2)
##    df      AIC
## m0 10 588.6524
## m1  9 582.3194
## m2 10 584.0241
anova(m0,m1,m2)
## refitting model(s) with ML (instead of REML)
## Data: meta_data
## Models:
## m1: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Site)
## m0: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Site)
## m2: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Site)
##    npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)
## m1    9 582.32 606.87 -282.16    564.32                     
## m0   10 601.45 628.72 -290.73    581.45  0.000  1          1
## m2   10 584.02 611.30 -282.01    564.02 17.426  0
res<-simulateResiduals(m1)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.0716, p-value = 0.616
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
#final model
m1<-glmer(sppr ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Site), data = meta_data,family = poisson)
summary(m1)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: sppr ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) +  
##     (1 | Site)
##    Data: meta_data
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     582.3     606.9    -282.2     564.3       104 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2097 -0.6729 -0.1605  0.6407  2.9206 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  Site   (Intercept) 0.006427 0.08017 
##  Season (Intercept) 0.017443 0.13207 
## Number of obs: 113, groups:  Site, 44; Season, 3
## 
## Fixed effects:
##                                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           -0.42997    0.57046  -0.754   0.4510    
## Boundary_descriptorOn-farm bare fence  0.07082    0.11217   0.631   0.5278    
## Boundary_descriptorOne year old        0.37965    0.17237   2.203   0.0276 *  
## Boundary_descriptorTwo year old        0.35007    0.16790   2.085   0.0371 *  
## Boundary_descriptorThree year old      0.71012    0.14071   5.047 4.50e-07 ***
## Boundary_descriptorOld                 0.58012    0.09574   6.060 1.36e-09 ***
## log(`Flower count`)                    0.40850    0.10175   4.015 5.95e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) B_O-bf Bn_Oyo Bndry_dscrptrTwyo Bndry_dscrptrThyo
## Bndry_dO-bf       -0.102                                                  
## Bndry_dsOyo       -0.095  0.329                                           
## Bndry_dscrptrTwyo  0.052  0.353  0.206                                    
## Bndry_dscrptrThyo -0.188  0.360  0.248  0.208                             
## Bndry_dscrO       -0.174  0.575  0.373  0.386             0.455           
## lg(`Flcnt`)       -0.981  0.004  0.031 -0.123             0.118           
##                   Bndr_O
## Bndry_dO-bf             
## Bndry_dsOyo             
## Bndry_dscrptrTwyo       
## Bndry_dscrptrThyo       
## Bndry_dscrO             
## lg(`Flcnt`)        0.062
#back transform to species count
emm1 <- emmeans(m1, ~ Boundary_descriptor, type = "response") 
#pairwise test
pairs(emm1,  adjust = "tukey")
##  contrast                                     ratio     SE  df null z.ratio
##  (Off-farm bare fence) / (On-farm bare fence) 0.932 0.1040 Inf    1  -0.631
##  (Off-farm bare fence) / One year old         0.684 0.1180 Inf    1  -2.203
##  (Off-farm bare fence) / Two year old         0.705 0.1180 Inf    1  -2.085
##  (Off-farm bare fence) / Three year old       0.492 0.0692 Inf    1  -5.047
##  (Off-farm bare fence) / Old                  0.560 0.0536 Inf    1  -6.060
##  (On-farm bare fence) / One year old          0.734 0.1260 Inf    1  -1.796
##  (On-farm bare fence) / Two year old          0.756 0.1250 Inf    1  -1.685
##  (On-farm bare fence) / Three year old        0.528 0.0765 Inf    1  -4.410
##  (On-farm bare fence) / Old                   0.601 0.0583 Inf    1  -5.251
##  One year old / Two year old                  1.030 0.2210 Inf    1   0.138
##  One year old / Three year old                0.719 0.1390 Inf    1  -1.707
##  One year old / Old                           0.818 0.1330 Inf    1  -1.230
##  Two year old / Three year old                0.698 0.1360 Inf    1  -1.843
##  Two year old / Old                           0.794 0.1260 Inf    1  -1.456
##  Three year old / Old                         1.139 0.1470 Inf    1   1.006
##  p.value
##   0.9887
##   0.2364
##   0.2950
##   <.0001
##   <.0001
##   0.4685
##   0.5415
##   0.0002
##   <.0001
##   1.0000
##   0.5271
##   0.8222
##   0.4378
##   0.6922
##   0.9161
## 
## P value adjustment: tukey method for comparing a family of 6 estimates 
## Tests are performed on the log scale
#pairs(emm1, by='Boundary_descriptor', adjust = "tukey")
#running each as I think all comparisons is a bit over the top and probably repeating things
cld_rich <- multcomp::cld(emm1, Letters = letters, adjust = "tukey")
## Note: adjust = "tukey" was changed to "sidak"
## because "tukey" is only appropriate for one set of pairwise comparisons
colnames(cld_rich)
## [1] "Boundary_descriptor" "rate"                "SE"                 
## [4] "df"                  "asymp.LCL"           "asymp.UCL"          
## [7] ".group"
cld_rich$Boundary_descriptor <- factor(cld_rich$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

ggplot(cld_rich, aes(x = Boundary_descriptor, y = rate)) +
  theme_bw()+
  geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), width = 0.2) +
  geom_point(size = 3,aes(colour=Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
# not sure if groupings will make sense with facet wrap
#  geom_text(aes(label = .group, y = asymp.UCL + 0.2), vjust = 0, size = 5) +
  labs(y = "Estimate species richness", x = "Boundary type", colour='Boundary type') +
    geom_text(aes(label = trimws(.group)),  hjust = -0.75) +
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

#final model but looking at impact with season
m0<-glmer(sppr ~ Boundary_descriptor + Season + log(`Flower count`) + (1|Site), data = meta_data,family = poisson,control=glmerControl(optimizer="bobyqa",optCtrl=list(maxfun=2e5)))
res<-simulateResiduals(m0)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 1.0638, p-value = 0.576
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = 0, p-value = 1
## alternative hypothesis: two.sided
summary(m0)
## Generalized linear mixed model fit by maximum likelihood (Laplace
##   Approximation) [glmerMod]
##  Family: poisson  ( log )
## Formula: sppr ~ Boundary_descriptor + Season + log(`Flower count`) + (1 |  
##     Site)
##    Data: meta_data
## Control: glmerControl(optimizer = "bobyqa", optCtrl = list(maxfun = 2e+05))
## 
##       AIC       BIC    logLik -2*log(L)  df.resid 
##     575.7     603.0    -277.8     555.7       103 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2079 -0.6527 -0.2140  0.6734  3.0904 
## 
## Random effects:
##  Groups Name        Variance Std.Dev.
##  Site   (Intercept) 0.005327 0.07298 
## Number of obs: 113, groups:  Site, 44
## 
## Fixed effects:
##                                       Estimate Std. Error z value Pr(>|z|)    
## (Intercept)                           -0.46604    0.55340  -0.842   0.3997    
## Boundary_descriptorOn-farm bare fence  0.07767    0.11134   0.698   0.4854    
## Boundary_descriptorOne year old        0.37562    0.17333   2.167   0.0302 *  
## Boundary_descriptorTwo year old        0.39251    0.16622   2.361   0.0182 *  
## Boundary_descriptorThree year old      0.68909    0.13992   4.925 8.44e-07 ***
## Boundary_descriptorOld                 0.58296    0.09490   6.143 8.10e-10 ***
## Season2022/23                         -0.23906    0.10214  -2.341   0.0193 *  
## Season2023/24                          0.12307    0.09263   1.329   0.1840    
## log(`Flower count`)                    0.42131    0.10320   4.083 4.45e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) B_O-bf Bn_Oyo Bndry_dscrptrTwyo Bndry_dscrptrThyo
## Bndry_dO-bf       -0.103                                                  
## Bndry_dsOyo       -0.104  0.328                                           
## Bndry_dscrptrTwyo  0.071  0.350  0.196                                    
## Bndry_dscrptrThyo -0.201  0.361  0.223  0.217                             
## Bndry_dscrO       -0.180  0.575  0.368  0.385             0.459           
## Sesn2022/23        0.302 -0.009  0.229 -0.230            -0.063           
## Sesn2023/24        0.218  0.073  0.276  0.059            -0.289           
## lg(`Flcnt`)       -0.985  0.000  0.009 -0.128             0.144           
##                   Bndr_O S2022/ S2023/
## Bndry_dO-bf                           
## Bndry_dsOyo                           
## Bndry_dscrptrTwyo                     
## Bndry_dscrptrThyo                     
## Bndry_dscrO                           
## Sesn2022/23       -0.042              
## Sesn2023/24       -0.011  0.573       
## lg(`Flcnt`)        0.066 -0.382 -0.312
#tbl_regression(m0) #still going to have to copy over so no point

#kable(summary(m0), format="html")
emm2 <- emmeans(m0, ~ Boundary_descriptor,type = "response",weights = "proportional")
pairs(emm2,  adjust = "tukey")
##  contrast                                     ratio     SE  df null z.ratio
##  (Off-farm bare fence) / (On-farm bare fence) 0.925 0.1030 Inf    1  -0.698
##  (Off-farm bare fence) / One year old         0.687 0.1190 Inf    1  -2.167
##  (Off-farm bare fence) / Two year old         0.675 0.1120 Inf    1  -2.361
##  (Off-farm bare fence) / Three year old       0.502 0.0702 Inf    1  -4.925
##  (Off-farm bare fence) / Old                  0.558 0.0530 Inf    1  -6.143
##  (On-farm bare fence) / One year old          0.742 0.1280 Inf    1  -1.727
##  (On-farm bare fence) / Two year old          0.730 0.1200 Inf    1  -1.913
##  (On-farm bare fence) / Three year old        0.543 0.0781 Inf    1  -4.247
##  (On-farm bare fence) / Old                   0.603 0.0580 Inf    1  -5.254
##  One year old / Two year old                  0.983 0.2120 Inf    1  -0.078
##  One year old / Three year old                0.731 0.1440 Inf    1  -1.591
##  One year old / Old                           0.813 0.1330 Inf    1  -1.263
##  Two year old / Three year old                0.743 0.1430 Inf    1  -1.539
##  Two year old / Old                           0.827 0.1290 Inf    1  -1.217
##  Three year old / Old                         1.112 0.1420 Inf    1   0.829
##  p.value
##   0.9823
##   0.2532
##   0.1700
##   <.0001
##   <.0001
##   0.5137
##   0.3941
##   0.0003
##   <.0001
##   1.0000
##   0.6042
##   0.8052
##   0.6386
##   0.8286
##   0.9623
## 
## Results are averaged over the levels of: Season 
## P value adjustment: tukey method for comparing a family of 6 estimates 
## Tests are performed on the log scale
cld_rich <- multcomp::cld(emm2, Letters = letters)
colnames(cld_rich)
## [1] "Boundary_descriptor" "rate"                "SE"                 
## [4] "df"                  "asymp.LCL"           "asymp.UCL"          
## [7] ".group"
cld_rich$Boundary_descriptor <- factor(cld_rich$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

ggplot(cld_rich, aes(x = Boundary_descriptor, y = rate)) +
  theme_bw()+
  geom_errorbar(aes(ymin = asymp.LCL, ymax = asymp.UCL), width = 0.2) +
  geom_point(size = 3,aes(colour=Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
# not sure if groupings will make sense with facet wrap
#  geom_text(aes(label = .group, y = asymp.UCL + 0.2), vjust = 0, size = 5) +
  labs(y = "Estimate species richness", x = "Boundary type", colour='Boundary type') +
  geom_text(aes(label = trimws(.group)),  hjust = -0.75) +
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

#pairs(emm2,  adjust = "tukey")
emm2 <- emmeans(m0, ~ Season,type = "response", weights = "proportional")
pairs(emm2,  adjust = "tukey")
##  contrast              ratio     SE  df null z.ratio p.value
##  (2021/22) / (2022/23) 1.270 0.1300 Inf    1   2.341  0.0504
##  (2021/22) / (2023/24) 0.884 0.0819 Inf    1  -1.329  0.3792
##  (2022/23) / (2023/24) 0.696 0.0629 Inf    1  -4.005  0.0002
## 
## Results are averaged over the levels of: Boundary_descriptor 
## P value adjustment: tukey method for comparing a family of 3 estimates 
## Tests are performed on the log scale
#the significant pairs of boundary types are exactly the same between the full model and the anova

Alpha diversity shannon

Alpha diversity using shannon

shannondiv<-diversity(counts_wide)
head(shannondiv)
## Off-farm bare fence Ahuriri 2021/22        One year old Ahuriri 2021/22 
##                           1.0335621                           0.7261928 
## Off-farm bare fence Ahuriri 2022/23        Two year old Ahuriri 2022/23 
##                           1.0986123                           0.0000000 
## Off-farm bare fence Ahuriri 2023/24      Three year old Ahuriri 2023/24 
##                           1.1465958                           2.1730992
#can plot that out
shannondiv_df <- shannondiv %>% 
  enframe() %>% 
  full_join(meta_data, by = c("name" = "unique_sample"))

shannondiv_df$Boundary_descriptor <- factor(shannondiv_df$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)


#just boundary
plot_sppr <- ggplot(shannondiv_df, aes(x = Boundary_descriptor, y = value, fill = Boundary_descriptor)) +
  geom_boxplot() +
  theme_bw()+
  scale_fill_manual(values=col_bound)+
  labs(x = "Boundary type",
       y = "Number of species per site",
       title = "Species richness: shannon",
       fill = "Boundary type")
plot_sppr

#split by season
plot_sppr <- ggplot(shannondiv_df, aes(x = Boundary_descriptor, y = value, fill = Boundary_descriptor)) +
  geom_boxplot() +
  facet_wrap(~Season,scales="free")+
  theme_bw()+
  scale_fill_manual(values=col_bound)+
  labs(x = "Boundary type",
       y = "Number of species per site",
       title = "Species richness: shannon",
       fill = "Boundary type")+
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

plot_sppr

####season as random effect
m0<-lmer(shannondiv ~ Boundary_descriptor+ log(`Flower count`)+(1|Season), data = meta_data)
m1<-lmer(shannondiv ~ Boundary_descriptor+ log(`Flower count`)+(1|Season) + (1|Farm_name) , data = meta_data)
m2<-lmer(shannondiv ~ Boundary_descriptor+ log(`Flower count`)+(1|Season) + (1|month_split) , data = meta_data)
m3<-lmer(shannondiv ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|month_split) + (1|Farm_name), data = meta_data)
m4<-lmer(shannondiv ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|month_split) + (1|Site), data = meta_data)
m5<-lmer(shannondiv ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Collection_month) + (1|Site), data = meta_data)
m6<-lmer(shannondiv ~ Boundary_descriptor + log(`Flower count`)+(1|Season) + (1|Site), data = meta_data)

AIC(m0,m1,m2,m3,m4,m5,m6) 
##    df      AIC
## m0  9 164.2780
## m1 10 161.5071
## m2 10 159.0525
## m3 11 155.1511
## m4 11 154.8432
## m5 11 157.0905
## m6 10 161.3789
anova(m0,m1,m2,m3,m4,m5,m6)
## refitting model(s) with ML (instead of REML)
## Data: meta_data
## Models:
## m0: shannondiv ~ Boundary_descriptor + log(`Flower count`) + (1 | Season)
## m1: shannondiv ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Farm_name)
## m2: shannondiv ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | month_split)
## m6: shannondiv ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Site)
## m3: shannondiv ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | month_split) + (1 | Farm_name)
## m4: shannondiv ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | month_split) + (1 | Site)
## m5: shannondiv ~ Boundary_descriptor + log(`Flower count`) + (1 | Season) + (1 | Collection_month) + (1 | Site)
##    npar    AIC    BIC  logLik -2*log(L)  Chisq Df Pr(>Chisq)   
## m0    9 149.11 173.66 -65.557    131.11                        
## m1   10 145.93 173.20 -62.963    125.93 5.1880  1   0.022744 * 
## m2   10 143.33 170.61 -61.667    123.33 2.5924  0              
## m6   10 146.66 173.94 -63.332    126.66 0.0000  0              
## m3   11 139.06 169.06 -58.527    117.06 9.6093  1   0.001936 **
## m4   11 139.72 169.72 -58.861    117.72 0.0000  0              
## m5   11 142.49 172.49 -60.246    120.49 0.0000  0              
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# site grouping....
#this feels right, there is little to no difference in outcome but site is better than farm and month is 

m3<-lmer(shannondiv ~ Boundary_descriptor + log(`Flower count`)+(1|Season)  + (1|Site), data = meta_data)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: shannondiv ~ Boundary_descriptor + log(`Flower count`) + (1 |  
##     Season) + (1 | Site)
##    Data: meta_data
## 
## REML criterion at convergence: 141.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.47732 -0.67525  0.02078  0.72373  2.02121 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 0.04462  0.2112  
##  Season   (Intercept) 0.03754  0.1938  
##  Residual             0.14472  0.3804  
## Number of obs: 113, groups:  Site, 44; Season, 3
## 
## Fixed effects:
##                                       Estimate Std. Error       df t value
## (Intercept)                           -1.04166    0.66090 91.64699  -1.576
## Boundary_descriptorOn-farm bare fence  0.04128    0.13711 41.30260   0.301
## Boundary_descriptorOne year old       -0.10870    0.20823 90.64249  -0.522
## Boundary_descriptorTwo year old        0.07262    0.20030 87.04817   0.363
## Boundary_descriptorThree year old      0.62029    0.19717 86.60267   3.146
## Boundary_descriptorOld                 0.24202    0.12782 39.58847   1.893
## log(`Flower count`)                    0.43846    0.11849 94.31857   3.700
##                                       Pr(>|t|)    
## (Intercept)                           0.118443    
## Boundary_descriptorOn-farm bare fence 0.764877    
## Boundary_descriptorOne year old       0.602938    
## Boundary_descriptorTwo year old       0.717804    
## Boundary_descriptorThree year old     0.002269 ** 
## Boundary_descriptorOld                0.065628 .  
## log(`Flower count`)                   0.000362 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) B_O-bf Bn_Oyo Bndry_dscrptrTwyo Bndry_dscrptrThyo
## Bndry_dO-bf       -0.082                                                  
## Bndry_dsOyo       -0.084  0.330                                           
## Bndry_dscrptrTwyo  0.074  0.346  0.343                                    
## Bndry_dscrptrThyo -0.170  0.311  0.339  0.335                             
## Bndry_dscrO       -0.135  0.518  0.344  0.353             0.356           
## lg(`Flcnt`)       -0.975 -0.019  0.018 -0.148             0.105           
##                   Bndr_O
## Bndry_dO-bf             
## Bndry_dsOyo             
## Bndry_dscrptrTwyo       
## Bndry_dscrptrThyo       
## Bndry_dscrO             
## lg(`Flcnt`)        0.029
qqnorm(residuals(m3))
qqline(residuals(m3))

plot(fitted(m3), residuals(m3))

res<-simulateResiduals(m3)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.94456, p-value = 0.904
## alternative hypothesis: two.sided
#testZeroInflation(res) not relavent as boundary effects of shannon

emm2 <- emmeans(m3, ~ Boundary_descriptor,type = "response")
pairs(emm2,simple='each',  adjust = "tukey")
##  contrast                                     estimate    SE   df t.ratio
##  (Off-farm bare fence) - (On-farm bare fence)  -0.0413 0.138 39.9  -0.300
##  (Off-farm bare fence) - One year old           0.1087 0.211 90.0   0.516
##  (Off-farm bare fence) - Two year old          -0.0726 0.202 86.2  -0.359
##  (Off-farm bare fence) - Three year old        -0.6203 0.199 85.8  -3.115
##  (Off-farm bare fence) - Old                   -0.2420 0.128 38.2  -1.888
##  (On-farm bare fence) - One year old            0.1500 0.210 89.0   0.714
##  (On-farm bare fence) - Two year old           -0.0313 0.201 85.3  -0.156
##  (On-farm bare fence) - Three year old         -0.5790 0.205 86.3  -2.824
##  (On-farm bare fence) - Old                    -0.2007 0.131 38.7  -1.536
##  One year old - Two year old                   -0.1813 0.238 69.8  -0.762
##  One year old - Three year old                 -0.7290 0.239 71.2  -3.048
##  One year old - Old                            -0.3507 0.206 91.4  -1.705
##  Two year old - Three year old                 -0.5477 0.234 69.6  -2.337
##  Two year old - Old                            -0.1694 0.198 88.5  -0.858
##  Three year old - Old                           0.3783 0.195 86.9   1.937
##  p.value
##   0.9996
##   0.9954
##   0.9992
##   0.0291
##   0.4245
##   0.9797
##   1.0000
##   0.0631
##   0.6439
##   0.9729
##   0.0366
##   0.5321
##   0.1935
##   0.9554
##   0.3869
## 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 6 estimates
#running each as I think all comparisons is a bit over the top and probably repeating things
cld_rich <- multcomp::cld(emm2, Letters = letters, adjust = "tukey")
## Note: adjust = "tukey" was changed to "sidak"
## because "tukey" is only appropriate for one set of pairwise comparisons
colnames(cld_rich)
## [1] "Boundary_descriptor" "emmean"              "SE"                 
## [4] "df"                  "lower.CL"            "upper.CL"           
## [7] ".group"
cld_rich$Boundary_descriptor <- factor(cld_rich$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

ggplot(cld_rich, aes(x = Boundary_descriptor, y = emmean)) +
#  facet_wrap(~Season)+
  theme_bw()+
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  geom_point(size = 3,aes(colour=Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
# not sure if groupings will make sense with facet wrap
#  geom_text(aes(label = .group, y = asymp.UCL + 0.2), vjust = 0, size = 5) +
  labs(y = "Estimated Shannon", x = "Boundary type", colour='Boundary type') +
    geom_text(aes(label = trimws(.group)),  hjust = -0.75) +
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

#final model but looking at impact with season
m0<-lmer(shannondiv ~ Boundary_descriptor + Season + log(`Flower count`) + (1|Site), data = meta_data)
summary(m0)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: shannondiv ~ Boundary_descriptor + Season + log(`Flower count`) +  
##     (1 | Site)
##    Data: meta_data
## 
## REML criterion at convergence: 140.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.47339 -0.69683  0.03224  0.70990  2.07410 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 0.04404  0.2099  
##  Residual             0.14511  0.3809  
## Number of obs: 113, groups:  Site, 44
## 
## Fixed effects:
##                                       Estimate Std. Error       df t value
## (Intercept)                           -1.06695    0.63791 94.24944  -1.673
## Boundary_descriptorOn-farm bare fence  0.04637    0.13690 41.48428   0.339
## Boundary_descriptorOne year old       -0.10625    0.20903 90.73933  -0.508
## Boundary_descriptorTwo year old        0.09765    0.20096 87.34039   0.486
## Boundary_descriptorThree year old      0.60014    0.19792 87.05872   3.032
## Boundary_descriptorOld                 0.24356    0.12755 39.74100   1.909
## Season2022/23                         -0.21790    0.10456 70.77007  -2.084
## Season2023/24                          0.19287    0.10361 76.35340   1.861
## log(`Flower count`)                    0.44418    0.11977 92.41159   3.709
##                                       Pr(>|t|)    
## (Intercept)                           0.097730 .  
## Boundary_descriptorOn-farm bare fence 0.736540    
## Boundary_descriptorOne year old       0.612472    
## Boundary_descriptorTwo year old       0.628237    
## Boundary_descriptorThree year old     0.003197 ** 
## Boundary_descriptorOld                0.063441 .  
## Season2022/23                         0.040771 *  
## Season2023/24                         0.066524 .  
## log(`Flower count`)                   0.000355 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##                   (Intr) B_O-bf Bn_Oyo Bndry_dscrptrTwyo Bndry_dscrptrThyo
## Bndry_dO-bf       -0.088                                                  
## Bndry_dsOyo       -0.099  0.330                                           
## Bndry_dscrptrTwyo  0.077  0.346  0.337                                    
## Bndry_dscrptrThyo -0.165  0.306  0.327  0.324                             
## Bndry_dscrO       -0.140  0.518  0.342  0.352             0.353           
## Sesn2022/23        0.303  0.008  0.212 -0.170            -0.041           
## Sesn2023/24        0.204  0.083  0.246  0.073            -0.250           
## lg(`Flcnt`)       -0.984 -0.021  0.005 -0.140             0.109           
##                   Bndr_O S2022/ S2023/
## Bndry_dO-bf                           
## Bndry_dsOyo                           
## Bndry_dscrptrTwyo                     
## Bndry_dscrptrThyo                     
## Bndry_dscrO                           
## Sesn2022/23       -0.011              
## Sesn2023/24        0.014  0.542       
## lg(`Flcnt`)        0.029 -0.379 -0.289
qqnorm(residuals(m0))
qqline(residuals(m0))

plot(fitted(m0), residuals(m0))

res<-simulateResiduals(m0)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.9277, p-value = 0.624
## alternative hypothesis: two.sided
#tbl_regression(m0) #still going to have to copy over so no point

#kable(summary(m0), format="html")
emm2 <- emmeans(m0, ~ Boundary_descriptor,type = "response", weights = "proportional")
pairs(emm2,  adjust = "tukey")
##  contrast                                     estimate    SE   df t.ratio
##  (Off-farm bare fence) - (On-farm bare fence)  -0.0464 0.137 39.9  -0.338
##  (Off-farm bare fence) - One year old           0.1063 0.210 90.0   0.507
##  (Off-farm bare fence) - Two year old          -0.0976 0.201 86.4  -0.485
##  (Off-farm bare fence) - Three year old        -0.6001 0.198 86.2  -3.030
##  (Off-farm bare fence) - Old                   -0.2436 0.128 38.2  -1.905
##  (On-farm bare fence) - One year old            0.1526 0.209 89.1   0.730
##  (On-farm bare fence) - Two year old           -0.0513 0.200 85.6  -0.256
##  (On-farm bare fence) - Three year old         -0.5538 0.204 86.7  -2.721
##  (On-farm bare fence) - Old                    -0.1972 0.130 38.7  -1.512
##  One year old - Two year old                   -0.2039 0.237 68.2  -0.862
##  One year old - Three year old                 -0.7064 0.237 69.5  -2.984
##  One year old - Old                            -0.3498 0.205 91.5  -1.709
##  Two year old - Three year old                 -0.5025 0.232 68.1  -2.164
##  Two year old - Old                            -0.1459 0.197 88.6  -0.742
##  Three year old - Old                           0.3566 0.194 87.2   1.837
##  p.value
##   0.9994
##   0.9958
##   0.9966
##   0.0368
##   0.4150
##   0.9777
##   0.9998
##   0.0813
##   0.6585
##   0.9541
##   0.0435
##   0.5292
##   0.2679
##   0.9760
##   0.4476
## 
## Results are averaged over the levels of: Season 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 6 estimates
cld_rich <- multcomp::cld(emm2, Letters = letters)
colnames(cld_rich)
## [1] "Boundary_descriptor" "emmean"              "SE"                 
## [4] "df"                  "lower.CL"            "upper.CL"           
## [7] ".group"
cld_rich$Boundary_descriptor <- factor(cld_rich$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

ggplot(cld_rich, aes(x = Boundary_descriptor, y = emmean)) +
  theme_bw()+
  geom_errorbar(aes(ymin = lower.CL, ymax = upper.CL), width = 0.2) +
  geom_point(size = 3,aes(colour=Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
# not sure if groupings will make sense with facet wrap
#  geom_text(aes(label = .group, y = asymp.UCL + 0.2), vjust = 0, size = 5) +
  labs(y = "Estimated Shannon", x = "Boundary type", colour='Boundary type') +
    geom_text(aes(label = trimws(.group)),  hjust = -0.75) +
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

#pairs(emm2,  adjust = "tukey")
emm2 <- emmeans(m0, ~ Season,type = "response", weights = "proportional")
pairs(emm2,  adjust = "tukey")
##  contrast              estimate    SE   df t.ratio p.value
##  (2021/22) - (2022/23)    0.218 0.105 69.3   2.081  0.1013
##  (2021/22) - (2023/24)   -0.193 0.104 75.1  -1.855  0.1592
##  (2022/23) - (2023/24)   -0.411 0.100 75.5  -4.108  0.0003
## 
## Results are averaged over the levels of: Boundary_descriptor 
## Degrees-of-freedom method: kenward-roger 
## P value adjustment: tukey method for comparing a family of 3 estimates

Using renyi profiles

Tracking the difference between the two communities using a renyi profile method. We can use this split by season in the paper as that gets around the random effects of sites and season.

The Renyi diversity profile at scale 0 reflects species richness, at scale 1 the Shannon index, at scale 2 the Simpson index and at scale Inf the Berger-Parker index (the dominance of the most abundant species):

If the profiles cross over then it may not be possible to compare diversity. I find this interesting as its the old samples that cross over. The 3 year olds don’t. This follows the above shannon/simpson analysis that shows evenness is highest in the 3-year old samples. We cant actually include this in the final paper as its not taking into account flower number/farm/date sampled like the models above are. But I wanted to get your thoughts on it as I havent seen data like this. Though the fly one was similar when simpson was so high for apple as there was basically no diversity at apple so it was perfectly even. We should probably include the accumulation plot in the supplementary though (along with the innext plots).

Im thinking that this suggests a lot more rare species are coming into the plantings compared to the bare fence. So the evenness becomes quite lower compared to the bare fence/control plantings. Which feeds into that science paper we discussed the other day about bee species rather than bee number being important.

meta_data$Boundary_descriptorf<-factor(meta_data$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"One year old","Two year old","Three year old","Old"))
meta_data_bior<-as.data.frame(meta_data)
Renyi.1<-renyicomp(counts_wide,y=meta_data_bior,factor='Boundary_descriptorf', 
  scales=c(0, 0.25, 0.5, 1, 2, 4, 8, Inf), permutations=1000, plotit=F)
## Set of permutations < 'minperm'. Generating entire set.
## Set of permutations < 'minperm'. Generating entire set.
renyi.long1 <- renyicomp.long(Renyi.1, label.freq=1)
renyi.long1_dataframe<-data.frame(renyi.long1$labelit,renyi.long1$Scales,renyi.long1$Obs,renyi.long1$Diversity,renyi.long1$UPR,renyi.long1$LWR,renyi.long1$Grouping)
renyi.long1_dataframe$Boundary_descriptor<-factor(renyi.long1_dataframe$renyi.long1.Grouping, levels = c('Off-farm bare fence','On-farm bare fence',"One year old","Two year old","Three year old","Old"))

pl_renyi_across <- ggplot(data=renyi.long1_dataframe, aes(x=renyi.long1.Scales, y=renyi.long1.Diversity, ymax=renyi.long1.UPR, ymin=renyi.long1.LWR)) + 
    scale_x_discrete() +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    geom_line(data=renyi.long1_dataframe, 
              aes(x=renyi.long1.Obs, colour=Boundary_descriptor), 
              linewidth=2) +
    geom_point(data=subset(renyi.long1_dataframe, renyi.long1.labelit==TRUE), 
        aes(colour=Boundary_descriptor, shape=Boundary_descriptor), 
        size=5) +
    geom_ribbon(data=renyi.long1_dataframe, 
                aes(x=renyi.long1.Obs, colour=Boundary_descriptor, fill=after_scale(alpha(colour, 0.1))), 
                show.legend=FALSE) + 
     scale_colour_manual(values=col_bound)+
    theme_bw() +
    #scale_colour_npg() +
    labs(x=expression(alpha), y = "Diversity", colour = "Boundary type", shape = "Boundary type")
pl_renyi_across
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

#not great but that is conflated with the effect of season so break it up by season:
#meta_data_bior_season<-meta_data_bior %>% filter(Season=='2023/24')
#meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())

#counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
#counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
#Renyi.1<-renyicomp(counts_wide_filt,y=meta_data_bior_season,factor='Boundary_descriptorf', 
#  scales=c(0, 0.25, 0.5, 1, 2, 4, 8, Inf), permutations=1000, plotit=F)
#renyi.long1 <- renyicomp.long(Renyi.1, label.freq=1)
#renyi.long1_dataframe<-data.frame(renyi.long1$labelit,renyi.long1$Scales,renyi.long1$Obs,renyi.long1$Diversity,renyi.long1$UPR,renyi.long1$LWR,renyi.long1$Grouping)
#renyi.long1_dataframe$Boundary_descriptor<-factor(renyi.long1_dataframe$renyi.long1.Grouping, levels = c('Control farm','Bare fence',"One year old","Two year old","Three year old","Old"))

#pl_renyi_across <- ggplot(data=renyi.long1_dataframe, aes(x=renyi.long1.Scales, y=renyi.long1.Diversity, ymax=renyi.long1.UPR, ymin=renyi.long1.LWR)) + 
#    scale_x_discrete() +
#    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
#    geom_line(data=renyi.long1_dataframe, 
#              aes(x=renyi.long1.Obs, colour=Boundary_descriptor), 
#              linewidth=2) +
#    geom_point(data=subset(renyi.long1_dataframe, renyi.long1.labelit==TRUE), 
#        aes(colour=Boundary_descriptor, shape=Boundary_descriptor), 
#        size=5) +
#    geom_ribbon(data=renyi.long1_dataframe, 
#                aes(x=renyi.long1.Obs, colour=Boundary_descriptor, fill=after_scale(alpha(colour, 0.1))), 
#                show.legend=FALSE) + 
#     scale_colour_manual(values=col_bound)+
#    theme_bw() +
#    labs(x=expression(alpha), y = "Diversity", colour = "Boundary type", shape = "Boundary type",title='Season 2023/24')
#pl_renyi_across


#2023/24 but also dropping the one year old and two year old that only have one replicate
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2023/24') #%>% filter(Boundary_descriptor!='One year old')%>% filter(Boundary_descriptor!='Two year old')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence    11
## 2 On-farm bare fence      7
## 3 Three year old          7
## 4 Old                    13
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence    11
## 2 On-farm bare fence      7
## 3 Three year old          7
## 4 Old                    13
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels =c('Off-farm bare fence','On-farm bare fence',"Three year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
Renyi.1<-renyicomp(counts_wide_filt,y=meta_data_bior_season,factor='Boundary_descriptorf', 
  scales=c(0, 0.25, 0.5, 1, 2, 4, 8, Inf), permutations=1000, plotit=F)
renyi.long1 <- renyicomp.long(Renyi.1, label.freq=1)
renyi.long1_dataframe<-data.frame(renyi.long1$labelit,renyi.long1$Scales,renyi.long1$Obs,renyi.long1$Diversity,renyi.long1$UPR,renyi.long1$LWR,renyi.long1$Grouping)
renyi.long1_dataframe$Boundary_descriptor<-factor(renyi.long1_dataframe$renyi.long1.Grouping, levels = c('Off-farm bare fence','On-farm bare fence',"Three year old","Old"))

col_bound_cut<-c('chocolate','#FDE725FF','#3B528BFF',"#440154FF")


pl_renyi_across <- ggplot(data=renyi.long1_dataframe, aes(x=renyi.long1.Scales, y=renyi.long1.Diversity, ymax=renyi.long1.UPR, ymin=renyi.long1.LWR)) + 
    scale_x_discrete() +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    geom_line(data=renyi.long1_dataframe, 
              aes(x=renyi.long1.Obs, colour=Boundary_descriptor), 
              linewidth=2) +
    geom_point(data=subset(renyi.long1_dataframe, renyi.long1.labelit==TRUE), 
        aes(colour=Boundary_descriptor, shape=Boundary_descriptor), 
        size=5) +
    geom_ribbon(data=renyi.long1_dataframe, 
                aes(x=renyi.long1.Obs, colour=Boundary_descriptor, fill=after_scale(alpha(colour, 0.1))), 
                show.legend=FALSE) + 
     scale_colour_manual(values=col_bound_cut)+
    theme_bw() +
    #scale_colour_npg() +
    labs(x=expression(alpha), y = "Diversity", colour = "Boundary type", shape = "Boundary type",title='Season 2023/24')
pl_renyi_across
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

#2022/23
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2022/23')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence     9
## 2 On-farm bare fence     10
## 3 Two year old            7
## 4 Old                    12
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"Two year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
Renyi.1<-renyicomp(counts_wide_filt,y=meta_data_bior_season,factor='Boundary_descriptorf', 
  scales=c(0, 0.25, 0.5, 1, 2, 4, 8, Inf), permutations=1000, plotit=F)
renyi.long1 <- renyicomp.long(Renyi.1, label.freq=1)
renyi.long1_dataframe<-data.frame(renyi.long1$labelit,renyi.long1$Scales,renyi.long1$Obs,renyi.long1$Diversity,renyi.long1$UPR,renyi.long1$LWR,renyi.long1$Grouping)
renyi.long1_dataframe$Boundary_descriptor<-factor(renyi.long1_dataframe$renyi.long1.Grouping, levels = c('Off-farm bare fence','On-farm bare fence',"Two year old","Old"))
col_bound_cut<-c('chocolate','#FDE725FF',"#21908CFF","#440154FF")
pl_renyi_across <- ggplot(data=renyi.long1_dataframe, aes(x=renyi.long1.Scales, y=renyi.long1.Diversity, ymax=renyi.long1.UPR, ymin=renyi.long1.LWR)) + 
    scale_x_discrete() +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    geom_line(data=renyi.long1_dataframe, 
              aes(x=renyi.long1.Obs, colour=Boundary_descriptor), 
              linewidth=2) +
    geom_point(data=subset(renyi.long1_dataframe, renyi.long1.labelit==TRUE), 
        aes(colour=Boundary_descriptor, shape=Boundary_descriptor), 
        size=5) +
    geom_ribbon(data=renyi.long1_dataframe, 
                aes(x=renyi.long1.Obs, colour=Boundary_descriptor, fill=after_scale(alpha(colour, 0.1))), 
                show.legend=FALSE) + 
     scale_colour_manual(values=col_bound_cut)+
    theme_bw() +
    #scale_colour_npg() +
    labs(x=expression(alpha), y = "Diversity", colour = "Boundary type", shape = "Boundary type",title='Season 2022/23')
pl_renyi_across
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

#2021/22
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2021/22')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence     9
## 2 On-farm bare fence     10
## 3 One year old            6
## 4 Old                    12
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"One year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
Renyi.1<-renyicomp(counts_wide_filt,y=meta_data_bior_season,factor='Boundary_descriptorf', 
  scales=c(0, 0.25, 0.5, 1, 2, 4, 8, Inf), permutations=1000, plotit=F)
renyi.long1 <- renyicomp.long(Renyi.1, label.freq=1)
renyi.long1_dataframe<-data.frame(renyi.long1$labelit,renyi.long1$Scales,renyi.long1$Obs,renyi.long1$Diversity,renyi.long1$UPR,renyi.long1$LWR,renyi.long1$Grouping)
renyi.long1_dataframe$Boundary_descriptor<-factor(renyi.long1_dataframe$renyi.long1.Grouping, levels = c('Off-farm bare fence','On-farm bare fence',"One year old","Old"))
col_bound_cut<-c('chocolate','#FDE725FF',"#5DC863FF","#440154FF")

pl_renyi_across <- ggplot(data=renyi.long1_dataframe, aes(x=renyi.long1.Scales, y=renyi.long1.Diversity, ymax=renyi.long1.UPR, ymin=renyi.long1.LWR)) + 
    scale_x_discrete() +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    geom_line(data=renyi.long1_dataframe, 
              aes(x=renyi.long1.Obs, colour=Boundary_descriptor), 
              linewidth=2) +
    geom_point(data=subset(renyi.long1_dataframe, renyi.long1.labelit==TRUE), 
        aes(colour=Boundary_descriptor, shape=Boundary_descriptor), 
        size=5) +
    geom_ribbon(data=renyi.long1_dataframe, 
                aes(x=renyi.long1.Obs, colour=Boundary_descriptor, fill=after_scale(alpha(colour, 0.1))), 
                show.legend=FALSE) + 
     scale_colour_manual(values=col_bound_cut)+
    theme_bw() +
    #scale_colour_npg() +
    labs(x=expression(alpha), y = "Diversity", colour = "Boundary type", shape = "Boundary type",title='Season 2021/22')
pl_renyi_across
## Warning in max(ids, na.rm = TRUE): no non-missing arguments to max; returning
## -Inf

#hmmm pattern holds across seasons, I thought maybe that it would be one season throwing everyone out

#just having a look at the accum results:
Accum.1 <- accumcomp(counts_wide,y=meta_data_bior,factor='Boundary_descriptorf', 
    method='exact', conditioned=FALSE, plotit=FALSE)
accum.long1 <- accumcomp.long(Accum.1, ci=NA, label.freq=5)
accum.long1_dataframe<-data.frame(accum.long1$Grouping,accum.long1$Obs,accum.long1$Sites,accum.long1$Richness,accum.long1$LWR,accum.long1$UPR,accum.long1$labelit)
accum.long1_dataframe$Boundary_descriptor<-factor(accum.long1_dataframe$accum.long1.Grouping, levels = c('Off-farm bare fence','On-farm bare fence',"One year old","Two year old","Three year old","Old"))


plotgg1 <- ggplot(data=accum.long1_dataframe, aes(x = accum.long1.Sites, y = accum.long1.Richness, ymax = accum.long1.UPR, ymin = accum.long1.LWR)) + 
    scale_x_continuous(expand=c(0, 1), sec.axis = dup_axis(labels=NULL, name=NULL)) +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    geom_line(aes(colour=Boundary_descriptor), linewidth=2) +
    geom_point(data=subset(accum.long1_dataframe, accum.long1.labelit==TRUE), 
               aes(colour=Boundary_descriptor, shape=Boundary_descriptor), size=5) +
    geom_ribbon(aes(colour=Boundary_descriptor), alpha=0.2, show.legend=FALSE) + 
     scale_colour_manual(values=col_bound)+
    theme_bw() +
    labs(x = "Sites", y = "Species", colour = "Boundary type", shape = "Boundary type")
plotgg1

#

Understanding how alpha diversity and seed set compare

Using Heathers sheet that she created of average seed set at each site. File: ‘pc_seed_data_summarised_cleaned.csv’ for average seed set.

seedset<-read.csv('C:/Users/hrlexd/Dropbox/PlantAndFood (1)/B4BI/Pakchoi/pc_seed_data_summarised_cleaned.csv')
colnames(seedset)
## [1] "Year"                "Farm"                "Type_of_farm"       
## [4] "Simplified_Boundary" "Native_new_age"      "Treatment"          
## [7] "Mean_seeds_viable"
#tidy up some names etc so I can merge across
#unique(seedset$Native_new_age)
seedset$Native_new_age<-gsub('One-year old native','One year old',seedset$Native_new_age)
seedset$Native_new_age<-gsub('Two-year old native','Two year old',seedset$Native_new_age)
seedset$Native_new_age<-gsub('Three-year old native','Three year old',seedset$Native_new_age)
seedset$Native_new_age<-gsub('Old native','Old',seedset$Native_new_age)
#unique(seedset$Year)
#unique(meta_data$Season)
seedset$Year<-gsub('2021-2022','2021/22',seedset$Year)
seedset$Year<-gsub('2022-2023','2022/23',seedset$Year)
seedset$Year<-gsub('2023-2024','2023/24',seedset$Year)
#colnames(meta_data)
#colnames(seedset)
seedset$Farm<-gsub('Ashford 1','Ashford_1',seedset$Farm)
seedset$Farm<-gsub('Lochan Mor','Lochan_Mor',seedset$Farm)
seedset$Farm<-gsub('Te Maania','Te_maania',seedset$Farm)
seedset$Farm<-gsub('Terrace View','Terrace_view',seedset$Farm)
seedset$Farm<-gsub('Nutpoint','Nut_Point',seedset$Farm)
seedset_meta<-left_join(seedset,meta_data,by=c('Native_new_age'='Boundary_descriptor','Farm'='Farm_name','Year'='Season'))

#drop limberick 1year old 2023/24 and garrickfeild 2year old 2023/24
seedset_meta<-seedset_meta %>% filter(!(Farm=='Limbrick' & Native_new_age=='One year old'))%>% filter(!(Farm=='Garrickfield' & Native_new_age=='Two year old'))

#now do comparison of species richness vs seedset
seedset_meta_clean<-seedset_meta %>% select(unique_sample,Treatment,Mean_seeds_viable)
#colnames(seedset_meta_clean)
#spread wide
seedset_meta_clean<-spread(seedset_meta_clean, key = Treatment, value = Mean_seeds_viable)

##########################
#seed set vs richness raw#
##########################

#generate species counts
#count species per property
sppr <- specnumber(counts_wide)
#head(sppr)

#data frame
sppr_df <- sppr %>% 
  enframe() %>% 
  full_join(meta_data, by = c("name" = "unique_sample"))

#join to seedset counts
sppr_df_ss<-sppr_df %>%  full_join(seedset_meta_clean, by = c("name" = "unique_sample"))

sppr_df_ss$Boundary_descriptor <- factor(sppr_df_ss$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)
colnames(sppr_df_ss)
##  [1] "name"                 "value"                "Boundary_descriptor" 
##  [4] "Season"               "Farm_name"            "Farm type"           
##  [7] "Flower count"         "Site"                 "Date"                
## [10] "Collection_month"     "Day"                  "Beg_Mid_end"         
## [13] "Month_group"          "month_split"          "Boundary_descriptorf"
## [16] "Closed"               "Hand pollinated"      "Open"
#there is 6 samples that I have species counts for but do not have seed set yeilds for, I have asked brad to clarify what is happening there
#just filtering out here so that there is no errors
sppr_df_ss <-sppr_df_ss  %>%
  drop_na(Open)

#open pollination being insect pollination:
plot_sppr <- ggplot(sppr_df_ss, aes(x = value, y = Open)) +
  geom_point(aes(colour = Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
  stat_poly_line() +
  stat_poly_eq() +
   theme_bw()+
  labs(x = "Species richness (count)",
       y = "Seed set",
       title = "Seed set vs species richness", colour = "Boundary type")+
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

plot_sppr

#model it properly with everything in like it should be:
#seedset is fairly normal
#but note that it is the average not the raw counts (hence lmer)
hist(sppr_df_ss$Open)

#model with flower count season and site (last two random effects)
names(sppr_df_ss)[names(sppr_df_ss) == "Flower count"] <- "Flower_count"

m3<-lmer(Open ~ value + log(Flower_count)+(1|Season) + (1|Site) ,data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + (1 | Season) + (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 534.5
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1182 -0.5030  0.1084  0.5423  2.0101 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 3.464    1.861   
##  Season   (Intercept) 3.934    1.983   
##  Residual             5.692    2.386   
## Number of obs: 107, groups:  Site, 44; Season, 3
## 
## Fixed effects:
##                    Estimate Std. Error        df t value Pr(>|t|)  
## (Intercept)         9.74844    4.40476  72.47366   2.213   0.0300 *
## value               0.18001    0.07527 101.19403   2.392   0.0186 *
## log(Flower_count)   0.47170    0.81011  85.30553   0.582   0.5619  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value 
## value        0.170       
## lg(Flwr_cn) -0.952 -0.316
m3<-lmer(Open ~ value  +log(Flower_count)+(1|Season) +(1|Boundary_descriptor)+ (1|Site),data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## Open ~ value + log(Flower_count) + (1 | Season) + (1 | Boundary_descriptor) +  
##     (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 513.2
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3629 -0.5703 -0.0390  0.5654  2.6245 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  Site                (Intercept) 0.3044   0.5517  
##  Boundary_descriptor (Intercept) 4.0084   2.0021  
##  Season              (Intercept) 3.4698   1.8627  
##  Residual                        5.7386   2.3955  
## Number of obs: 107, groups:  Site, 44; Boundary_descriptor, 6; Season, 3
## 
## Fixed effects:
##                    Estimate Std. Error        df t value Pr(>|t|)  
## (Intercept)         7.12052    4.26032  84.72785   1.671   0.0983 .
## value               0.02207    0.07586 103.01312   0.291   0.7717  
## log(Flower_count)   1.28924    0.77573  98.04269   1.662   0.0997 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value 
## value        0.187       
## lg(Flwr_cn) -0.934 -0.351
m3<-lmer(Open ~ value + Boundary_descriptor +log(Flower_count)+Season + (1|Site),data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + Boundary_descriptor + log(Flower_count) + Season +  
##     (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 482.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3506 -0.5698 -0.0434  0.5881  2.6459 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 0.2559   0.5059  
##  Residual             5.7952   2.4073  
## Number of obs: 107, groups:  Site, 44
## 
## Fixed effects:
##                        Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            5.079351   3.979542 92.459518   1.276    0.205    
## value                 -0.007205   0.077678 96.864086  -0.093    0.926    
## Boundary_descriptor.L  4.352139   0.697652 78.306676   6.238 2.10e-08 ***
## Boundary_descriptor.Q -1.125204   0.735367 38.600675  -1.530    0.134    
## Boundary_descriptor.C -0.874738   0.787803 96.893550  -1.110    0.270    
## Boundary_descriptor^4  0.789362   0.862959 79.641222   0.915    0.363    
## Boundary_descriptor^5 -0.436259   1.015315 68.525179  -0.430    0.669    
## log(Flower_count)      1.329560   0.788282 93.868434   1.687    0.095 .  
## Season2022/23          3.139662   0.673225 62.627059   4.664 1.68e-05 ***
## Season2023/24          3.369802   0.684577 70.333202   4.922 5.44e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value  Bnd_.L Bnd_.Q Bnd_.C Bnd_^4 Bnd_^5 lg(F_) S2022/
## value        0.213                                                        
## Bndry_dsc.L -0.154 -0.528                                                 
## Bndry_dsc.Q -0.130  0.050 -0.069                                          
## Bndry_dsc.C -0.031  0.106 -0.465 -0.005                                   
## Bndry_dsc^4  0.123  0.058 -0.332 -0.378  0.390                            
## Bndry_dsc^5  0.179  0.113 -0.023  0.059 -0.215  0.155                     
## lg(Flwr_cn) -0.981 -0.363  0.260  0.082 -0.034 -0.136 -0.181              
## Sesn2022/23  0.325  0.211 -0.233 -0.028  0.231  0.057 -0.267 -0.413       
## Sesn2023/24  0.115 -0.185 -0.142 -0.174  0.341  0.345 -0.038 -0.151  0.451
m3<-lmer(Open ~ value + Boundary_descriptor +log(Flower_count)+(1|Season) + (1|Site),data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + Boundary_descriptor + log(Flower_count) + (1 |  
##     Season) + (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 491.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3155 -0.5517 -0.0146  0.6128  2.6518 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 0.2841   0.533   
##  Season   (Intercept) 3.3315   1.825   
##  Residual             5.7693   2.402   
## Number of obs: 107, groups:  Site, 44; Season, 3
## 
## Fixed effects:
##                        Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            6.773584   4.206921 81.805763   1.610   0.1112    
## value                 -0.007154   0.077274 98.361999  -0.093   0.9264    
## Boundary_descriptor.L  4.404801   0.697265 78.056749   6.317 1.51e-08 ***
## Boundary_descriptor.Q -1.090963   0.736800 38.257328  -1.481   0.1469    
## Boundary_descriptor.C -0.973968   0.784734 98.010471  -1.241   0.2175    
## Boundary_descriptor^4  0.710538   0.858056 81.274637   0.828   0.4101    
## Boundary_descriptor^5 -0.379745   1.010969 69.690942  -0.376   0.7083    
## log(Flower_count)      1.416444   0.783499 95.313448   1.808   0.0738 .  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value  Bnd_.L Bnd_.Q Bnd_.C Bnd_^4 Bnd_^5
## value        0.199                                          
## Bndry_dsc.L -0.160 -0.529                                   
## Bndry_dsc.Q -0.133  0.046 -0.073                            
## Bndry_dsc.C -0.004  0.110 -0.462  0.002                     
## Bndry_dsc^4  0.138  0.066 -0.330 -0.374  0.383              
## Bndry_dsc^5  0.160  0.121 -0.028  0.061 -0.212  0.154       
## lg(Flwr_cn) -0.953 -0.361  0.256  0.082 -0.030 -0.138 -0.190
qqnorm(residuals(m3))
qqline(residuals(m3))

plot(fitted(m3), residuals(m3))

res<-simulateResiduals(m3)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.93477, p-value = 0.944
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
#bl_regression(m0)

p <- ggpredict(m3,terms = "value")

ggplot(p, aes(x, x + mean(sppr_df_ss$value), y = predicted)) +
  geom_line(size = 1.1) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) +
  labs(
    x = "Species richness",
    y = "Mean seed set per site",
    title = "Marginal prediction: seed set vs species richness") +
  theme_bw()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

#by season
m3<-lmer(Open ~ value + log(Flower_count)+Season + (1|Boundary_descriptor)+ (1|Site),data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## Open ~ value + log(Flower_count) + Season + (1 | Boundary_descriptor) +  
##     (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 503.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3967 -0.5697 -0.0115  0.5794  2.6196 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  Site                (Intercept) 0.2789   0.5281  
##  Boundary_descriptor (Intercept) 3.9536   1.9884  
##  Residual                        5.7655   2.4011  
## Number of obs: 107, groups:  Site, 44; Boundary_descriptor, 6
## 
## Fixed effects:
##                    Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)         5.37786    4.03073  96.88033   1.334    0.185    
## value               0.02226    0.07626 101.43120   0.292    0.771    
## log(Flower_count)   1.20303    0.78058  96.50654   1.541    0.127    
## Season2022/23       3.23821    0.66260  65.59056   4.887 6.92e-06 ***
## Season2023/24       3.39585    0.67044  73.48013   5.065 2.93e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value  lg(F_) S2022/
## value        0.202                     
## lg(Flwr_cn) -0.961 -0.353              
## Sesn2022/23  0.331  0.209 -0.424       
## Sesn2023/24  0.108 -0.204 -0.143  0.447
m3<-lmer(Open ~ value + log(Flower_count)+Season + Boundary_descriptor+ (1|Site),data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + Season + Boundary_descriptor +  
##     (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 482.1
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.3506 -0.5698 -0.0434  0.5881  2.6459 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 0.2559   0.5059  
##  Residual             5.7952   2.4073  
## Number of obs: 107, groups:  Site, 44
## 
## Fixed effects:
##                        Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)            5.079351   3.979542 92.459511   1.276    0.205    
## value                 -0.007205   0.077678 96.864086  -0.093    0.926    
## log(Flower_count)      1.329560   0.788282 93.868428   1.687    0.095 .  
## Season2022/23          3.139662   0.673225 62.627058   4.664 1.68e-05 ***
## Season2023/24          3.369802   0.684577 70.333201   4.922 5.44e-06 ***
## Boundary_descriptor.L  4.352139   0.697652 78.306676   6.238 2.10e-08 ***
## Boundary_descriptor.Q -1.125204   0.735367 38.600675  -1.530    0.134    
## Boundary_descriptor.C -0.874738   0.787803 96.893550  -1.110    0.270    
## Boundary_descriptor^4  0.789362   0.862959 79.641222   0.915    0.363    
## Boundary_descriptor^5 -0.436259   1.015315 68.525178  -0.430    0.669    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value  lg(F_) S2022/ S2023/ Bnd_.L Bnd_.Q Bnd_.C Bnd_^4
## value        0.213                                                        
## lg(Flwr_cn) -0.981 -0.363                                                 
## Sesn2022/23  0.325  0.211 -0.413                                          
## Sesn2023/24  0.115 -0.185 -0.151  0.451                                   
## Bndry_dsc.L -0.154 -0.528  0.260 -0.233 -0.142                            
## Bndry_dsc.Q -0.130  0.050  0.082 -0.028 -0.174 -0.069                     
## Bndry_dsc.C -0.031  0.106 -0.034  0.231  0.341 -0.465 -0.005              
## Bndry_dsc^4  0.123  0.058 -0.136  0.057  0.345 -0.332 -0.378  0.390       
## Bndry_dsc^5  0.179  0.113 -0.181 -0.267 -0.038 -0.023  0.059 -0.215  0.155
m3<-lmer(Open ~ value + log(Flower_count)+Season + (1|Site),data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + Season + (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 524.8
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1463 -0.4893  0.1281  0.5772  2.0114 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 3.432    1.853   
##  Residual             5.709    2.389   
## Number of obs: 107, groups:  Site, 44
## 
## Fixed effects:
##                   Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)         7.9956     4.1605 82.1854   1.922   0.0581 .  
## value               0.1820     0.0757 99.6168   2.405   0.0180 *  
## log(Flower_count)   0.3654     0.8163 83.8605   0.448   0.6556    
## Season2022/23       3.7778     0.6390 62.5857   5.912 1.52e-07 ***
## Season2023/24       3.1635     0.6382 64.7787   4.957 5.44e-06 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value  lg(F_) S2022/
## value        0.188                     
## lg(Flwr_cn) -0.984 -0.321              
## Sesn2022/23  0.424  0.214 -0.498       
## Sesn2023/24  0.085 -0.254 -0.109  0.419
qqnorm(residuals(m3))
qqline(residuals(m3))

plot(fitted(m3), residuals(m3))

res<-simulateResiduals(m3)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.97348, p-value = 0.872
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
p <- ggpredict(m3,terms = c("value","Season"))
plot(p)

ggplot(p, aes(x = x, y = predicted,colour = group, fill = group)) +
  geom_line(size = 1.2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high),alpha = 0.25, colour = NA) +
 labs(
    x = "Species richness",
    y = "Mean seed set per site",
    title = "Marginal prediction: seed set vs species richness",
    colour = "Season",
    fill = "Season"
  ) +
  theme_bw() +
  theme(
    legend.position = "right",
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 11)
  )

ggplot() +
  geom_point(data = sppr_df_ss,aes(x = value, y = Open),alpha = 0.4, size = 2) +
  geom_line(data = p,aes(x = x, y = predicted, colour = group),size = 1.2) +
  geom_ribbon(data = p,aes(x = x, ymin = conf.low, ymax = conf.high,fill = group),alpha = 0.25, colour = NA) +
  labs(x = "Species richness",
    y = "Mean seed set per site",
    title = "Marginal prediction: seed set vs species richness",
    colour = "Season",
    fill = "Season"
  ) +
  theme_bw()

#what about just those that are measured across all seasons so only compare old and bare fences
sppr_df_ss_cut<-sppr_df_ss %>% filter(Boundary_descriptor=='Off-farm bare fence'|Boundary_descriptor=='On-farm bare fence'|Boundary_descriptor=='Old')
m3<-lmer(Open ~ value + log(Flower_count) +Season+ (1|Boundary_descriptor)+ (1|Site),data=sppr_df_ss_cut)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: 
## Open ~ value + log(Flower_count) + Season + (1 | Boundary_descriptor) +  
##     (1 | Site)
##    Data: sppr_df_ss_cut
## 
## REML criterion at convergence: 412.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1694 -0.5733 -0.0215  0.5665  2.4088 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  Site                (Intercept) 0.7734   0.8795  
##  Boundary_descriptor (Intercept) 5.8721   2.4233  
##  Residual                        5.8779   2.4244  
## Number of obs: 87, groups:  Site, 37; Boundary_descriptor, 3
## 
## Fixed effects:
##                   Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)        3.55735    4.85124 66.33394   0.733    0.466    
## value              0.01465    0.09384 80.90002   0.156    0.876    
## log(Flower_count)  1.37737    0.92721 78.21449   1.486    0.141    
## Season2022/23      3.13724    0.70071 53.26863   4.477 4.02e-05 ***
## Season2023/24      3.24236    0.70398 56.48018   4.606 2.38e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value  lg(F_) S2022/
## value        0.263                     
## lg(Flwr_cn) -0.944 -0.405              
## Sesn2022/23  0.379  0.259 -0.471       
## Sesn2023/24  0.127 -0.199 -0.156  0.436
sppr_df_ss_cut<-sppr_df_ss %>% filter(Boundary_descriptor=='Old')
m3<-lmer(Open ~ value + log(Flower_count) +Season+ (1|Site) ,data=sppr_df_ss_cut)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + Season + (1 | Site)
##    Data: sppr_df_ss_cut
## 
## REML criterion at convergence: 127.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.76162 -0.49990 -0.06707  0.61526  1.31880 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 1.929    1.389   
##  Residual             1.688    1.299   
## Number of obs: 34, groups:  Site, 14
## 
## Fixed effects:
##                   Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)       -1.11417    4.99536 20.77038  -0.223 0.825682    
## value             -0.13026    0.07591 21.16503  -1.716 0.100771    
## log(Flower_count)  3.01112    1.01960 20.64804   2.953 0.007682 ** 
## Season2022/23      3.72347    0.67889 17.05583   5.485 3.98e-05 ***
## Season2023/24      3.08897    0.65466 16.88339   4.718 0.000202 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value  lg(F_) S2022/
## value        0.358                     
## lg(Flwr_cn) -0.986 -0.473              
## Sesn2022/23  0.579  0.213 -0.617       
## Sesn2023/24  0.165 -0.265 -0.167  0.455
sppr_df_ss_cut<-sppr_df_ss %>% filter(Boundary_descriptor=='Off-farm bare fence')
m3<-lmer(Open ~ value + log(Flower_count) +Season+ (1|Site) ,data=sppr_df_ss_cut)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + Season + (1 | Site)
##    Data: sppr_df_ss_cut
## 
## REML criterion at convergence: 120.4
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.14111 -0.51638 -0.01501  0.39440  1.54799 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 3.929    1.982   
##  Residual             5.054    2.248   
## Number of obs: 27, groups:  Site, 12
## 
## Fixed effects:
##                   Estimate Std. Error      df t value Pr(>|t|)   
## (Intercept)         5.1462     8.2445 18.3959   0.624  0.54016   
## value              -0.2501     0.2597 19.5414  -0.963  0.34742   
## log(Flower_count)   1.1393     1.6831 19.8911   0.677  0.50625   
## Season2022/23       2.7751     1.2562 11.1999   2.209  0.04888 * 
## Season2023/24       3.6467     1.1583 11.4222   3.148  0.00888 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value  lg(F_) S2022/
## value        0.372                     
## lg(Flwr_cn) -0.977 -0.535              
## Sesn2022/23  0.282  0.513 -0.408       
## Sesn2023/24  0.082 -0.053 -0.128  0.388
sppr_df_ss_cut<-sppr_df_ss %>% filter(Boundary_descriptor=='On-farm bare fence')
m3<-lmer(Open ~ value + log(Flower_count) +Season+ (1|Site) ,data=sppr_df_ss_cut)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + Season + (1 | Site)
##    Data: sppr_df_ss_cut
## 
## REML criterion at convergence: 119.7
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.2745 -0.4192 -0.1058  0.6664  1.6630 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 0.04616  0.2148  
##  Residual             9.69261  3.1133  
## Number of obs: 26, groups:  Site, 11
## 
## Fixed effects:
##                   Estimate Std. Error      df t value Pr(>|t|)
## (Intercept)         1.2040    10.5279 19.7550   0.114    0.910
## value               0.2526     0.2648 20.0843   0.954    0.351
## log(Flower_count)   1.4007     2.0980 19.7710   0.668    0.512
## Season2022/23       1.8800     1.6798 15.1292   1.119    0.281
## Season2023/24       2.5693     1.6769 15.7790   1.532    0.145
## 
## Correlation of Fixed Effects:
##             (Intr) value  lg(F_) S2022/
## value        0.224                     
## lg(Flwr_cn) -0.983 -0.375              
## Sesn2022/23  0.440  0.362 -0.531       
## Sesn2023/24  0.195 -0.068 -0.228  0.436
#or only within a season
sppr_df_ss_cut<-sppr_df_ss %>% filter(Season=='2021/22')
#by season
m3<-lmer(Open ~ value + log(Flower_count) + (1|Boundary_descriptor),data=sppr_df_ss_cut)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + (1 | Boundary_descriptor)
##    Data: sppr_df_ss_cut
## 
## REML criterion at convergence: 154.3
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -1.52732 -0.74887  0.05349  0.60418  1.91658 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  Boundary_descriptor (Intercept) 3.210    1.792   
##  Residual                        3.304    1.818   
## Number of obs: 37, groups:  Boundary_descriptor, 4
## 
## Fixed effects:
##                    Estimate Std. Error        df t value Pr(>|t|)  
## (Intercept)       10.822790   5.084003 32.921631   2.129   0.0408 *
## value              0.001702   0.123980 32.007814   0.014   0.9891  
## log(Flower_count)  0.072376   0.928515 31.239270   0.078   0.9384  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value 
## value       -0.226       
## lg(Flwr_cn) -0.965  0.040
sppr_df_ss_cut<-sppr_df_ss %>% filter(Season=='2022/23')
#by season
m3<-lmer(Open ~ value + log(Flower_count) + (1|Boundary_descriptor),data=sppr_df_ss_cut)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + (1 | Boundary_descriptor)
##    Data: sppr_df_ss_cut
## 
## REML criterion at convergence: 195.8
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -2.36258 -0.58202  0.09266  0.55173  1.68819 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  Boundary_descriptor (Intercept) 6.207    2.491   
##  Residual                        9.768    3.125   
## Number of obs: 38, groups:  Boundary_descriptor, 4
## 
## Fixed effects:
##                   Estimate Std. Error      df t value Pr(>|t|)
## (Intercept)        -0.1416    10.4174 34.3136  -0.014    0.989
## value               0.1032     0.1750 34.7676   0.589    0.559
## log(Flower_count)   2.5734     1.9744 33.7604   1.303    0.201
## 
## Correlation of Fixed Effects:
##             (Intr) value 
## value        0.569       
## lg(Flwr_cn) -0.987 -0.647
sppr_df_ss_cut<-sppr_df_ss %>% filter(Season=='2023/24')
#by season
m3<-lmer(Open ~ value + log(Flower_count) + (1|Boundary_descriptor),data=sppr_df_ss_cut)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + (1 | Boundary_descriptor)
##    Data: sppr_df_ss_cut
## 
## REML criterion at convergence: 141.6
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.4813 -0.5206 -0.0466  0.3076  3.1814 
## 
## Random effects:
##  Groups              Name        Variance Std.Dev.
##  Boundary_descriptor (Intercept) 5.865    2.422   
##  Residual                        4.178    2.044   
## Number of obs: 32, groups:  Boundary_descriptor, 4
## 
## Fixed effects:
##                    Estimate Std. Error        df t value Pr(>|t|)  
## (Intercept)       16.580069   6.830449 27.796048   2.427   0.0219 *
## value             -0.120687   0.107276 28.615032  -1.125   0.2699  
## log(Flower_count) -0.008866   1.273251 26.629612  -0.007   0.9945  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value 
## value        0.082       
## lg(Flwr_cn) -0.969 -0.247
#####################
#seed set vs shannon#
#####################

shannondiv<-diversity(counts_wide)
head(shannondiv)
## Off-farm bare fence Ahuriri 2021/22        One year old Ahuriri 2021/22 
##                           1.0335621                           0.7261928 
## Off-farm bare fence Ahuriri 2022/23        Two year old Ahuriri 2022/23 
##                           1.0986123                           0.0000000 
## Off-farm bare fence Ahuriri 2023/24      Three year old Ahuriri 2023/24 
##                           1.1465958                           2.1730992
#can plot that out
shannondiv_df <- shannondiv %>% 
  enframe() %>% 
  full_join(meta_data, by = c("name" = "unique_sample"))

#join to seedset counts
sppr_df_ss<-shannondiv_df %>%  full_join(seedset_meta_clean, by = c("name" = "unique_sample"))

sppr_df_ss$Boundary_descriptor <- factor(sppr_df_ss$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)
colnames(sppr_df_ss)
##  [1] "name"                 "value"                "Boundary_descriptor" 
##  [4] "Season"               "Farm_name"            "Farm type"           
##  [7] "Flower count"         "Site"                 "Date"                
## [10] "Collection_month"     "Day"                  "Beg_Mid_end"         
## [13] "Month_group"          "month_split"          "Boundary_descriptorf"
## [16] "Closed"               "Hand pollinated"      "Open"
#there is 6 samples that I have species counts for but do not have seed set yeilds for, I have asked brad to clarify what is happening there
#just filtering out here so that there is no errors
sppr_df_ss <-sppr_df_ss  %>%
  drop_na(Open)

#open pollination being insect pollination:
plot_sppr <- ggplot(sppr_df_ss, aes(x = value, y = Open)) +
  geom_point(aes(colour = Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
  stat_poly_line() +
  stat_poly_eq() +
   theme_bw()+
  labs(x = "Shannon",
       y = "Seed set",
       title = "Seed set vs shannon", colour = "Boundary type")+
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

plot_sppr

#model it properly with everything in like it should be:

#flower count goes in a offset in earlier models but here richness is response so its going in as a variable
#model with flower count season and site (last two random effects)
names(sppr_df_ss)[names(sppr_df_ss) == "Flower count"] <- "Flower_count"
m3<-lmer(Open ~ value + log(Flower_count)+(1|Season) + (1|Site),data=sppr_df_ss)

summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + (1 | Season) + (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 531.9
## 
## Scaled residuals: 
##      Min       1Q   Median       3Q      Max 
## -3.11926 -0.49779  0.06816  0.54497  1.85624 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 4.303    2.074   
##  Season   (Intercept) 3.721    1.929   
##  Residual             5.413    2.327   
## Number of obs: 107, groups:  Site, 44; Season, 3
## 
## Fixed effects:
##                   Estimate Std. Error      df t value Pr(>|t|)  
## (Intercept)         9.2671     4.3531 73.6129   2.129   0.0366 *
## value               1.1387     0.6425 92.4912   1.772   0.0796 .
## log(Flower_count)   0.5295     0.8229 86.9368   0.643   0.5216  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value 
## value        0.177       
## lg(Flwr_cn) -0.941 -0.387
qqnorm(residuals(m3))
qqline(residuals(m3))

plot(fitted(m3), residuals(m3))

res<-simulateResiduals(m3)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.99292, p-value = 0.912
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
p <- ggpredict(m3,terms = "value")

ggplot(p, aes(x, x + mean(sppr_df_ss$value), y = predicted)) +
  geom_line(size = 1.1) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) +
  labs(
    x = "Shannon",
    y = "Mean seed set per site",
    title = "Marginal prediction: seed set vs shannon") +
  theme_bw()

#by season
m3<-lmer(Open ~ value + log(Flower_count)+Season + (1|Site),data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ value + log(Flower_count) + Season + (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 522.4
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1479 -0.4880  0.1047  0.5365  1.8553 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 4.286    2.070   
##  Residual             5.423    2.329   
## Number of obs: 107, groups:  Site, 44
## 
## Fixed effects:
##                   Estimate Std. Error      df t value Pr(>|t|)    
## (Intercept)         7.5679     4.1257 83.0994   1.834   0.0702 .  
## value               1.1377     0.6498 90.4771   1.751   0.0833 .  
## log(Flower_count)   0.4304     0.8301 85.3394   0.518   0.6055    
## Season2022/23       3.6815     0.6252 65.8567   5.889 1.45e-07 ***
## Season2023/24       3.0360     0.6626 68.5889   4.582 2.01e-05 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) value  lg(F_) S2022/
## value        0.205                     
## lg(Flwr_cn) -0.973 -0.393              
## Sesn2022/23  0.432  0.218 -0.506       
## Sesn2023/24  0.039 -0.407 -0.012  0.355
qqnorm(residuals(m3))
qqline(residuals(m3))

plot(fitted(m3), residuals(m3))

res<-simulateResiduals(m3)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.97754, p-value = 0.92
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
p <- ggpredict(m3,terms = c("value","Season"))
plot(p)

ggplot(p, aes(x = x, y = predicted,colour = group, fill = group)) +
  geom_line(size = 1.2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high),alpha = 0.25, colour = NA) +
  labs(x = "Shannon",
    y = "Mean seed set per site",
    title = "Marginal prediction: seed set vs shannon",
    colour = "Season",
    fill = "Season"
  ) +
  theme_bw() +
  theme(
    legend.position = "right",
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 11)
  )

ggplot() +
  geom_point(data = sppr_df_ss,aes(x = value, y = Open),alpha = 0.4, size = 2) +
  geom_line(data = p,aes(x = x, y = predicted, colour = group),size = 1.2) +
  geom_ribbon(data = p,aes(x = x, ymin = conf.low, ymax = conf.high,fill = group),alpha = 0.25, colour = NA) +
  labs(x = "Shannon",
    y = "Mean seed set per site",
    title = "Marginal prediction: seed set vs shannon",
    colour = "Season",
    fill = "Season"
  ) +
  theme_bw()

#############################
#seed set vs total abundance#
#############################

insect_counts<-rowSums(counts_wide)

insect_counts_df<-data.frame(insect_counts)
insect_counts_df <- rownames_to_column(insect_counts_df, var = "unique_sample")
insect_counts_df<-left_join(insect_counts_df,meta_data)
## Joining with `by = join_by(unique_sample)`
#join to seedset counts
sppr_df_ss<-insect_counts_df %>%  full_join(seedset_meta_clean, by = c("unique_sample" = "unique_sample"))

sppr_df_ss$Boundary_descriptor <- factor(sppr_df_ss$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)
colnames(sppr_df_ss)
##  [1] "unique_sample"        "insect_counts"        "Boundary_descriptor" 
##  [4] "Season"               "Farm_name"            "Farm type"           
##  [7] "Flower count"         "Site"                 "Date"                
## [10] "Collection_month"     "Day"                  "Beg_Mid_end"         
## [13] "Month_group"          "month_split"          "Boundary_descriptorf"
## [16] "Closed"               "Hand pollinated"      "Open"
#there is 6 samples that I have species counts for but do not have seed set yeilds for, I have asked brad to clarify what is happening there
#just filtering out here so that there is no errors
sppr_df_ss <-sppr_df_ss  %>%
  drop_na(Open)

#open pollination being insect pollination:
plot_sppr <- ggplot(sppr_df_ss, aes(x = insect_counts, y = Open)) +
  geom_point(aes(colour = Boundary_descriptor)) +
  scale_colour_manual(values=col_bound)+
  stat_poly_line() +
  stat_poly_eq() +
   theme_bw()+
  labs(x = "Raw counts",
       y = "Seed set",
       title = "Seed set vs raw counts", colour = "Boundary type")+
  theme(axis.text.x = element_text(angle = 90,size=7,hjust=1))

plot_sppr

#model it properly with everything in like it should be:

#flower count goes in a offset in earlier models but here richness is response so its going in as a variable
#model with flower count season and site (last two random effects)
names(sppr_df_ss)[names(sppr_df_ss) == "Flower count"] <- "Flower_count"

m3<-lmer(Open ~ insect_counts + log(Flower_count)+(1|Season) + (1|Site),data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ insect_counts + log(Flower_count) + (1 | Season) + (1 |  
##     Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 540
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1696 -0.4715  0.1022  0.5232  1.9179 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 3.673    1.917   
##  Season   (Intercept) 4.598    2.144   
##  Residual             5.610    2.368   
## Number of obs: 107, groups:  Site, 44; Season, 3
## 
## Fixed effects:
##                    Estimate Std. Error        df t value Pr(>|t|)  
## (Intercept)       1.068e+01  4.521e+00 7.078e+01   2.362   0.0210 *
## insect_counts     1.457e-02  6.653e-03 1.026e+02   2.190   0.0308 *
## log(Flower_count) 4.212e-01  8.220e-01 8.653e+01   0.512   0.6097  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) insct_
## insect_cnts  0.269       
## lg(Flwr_cn) -0.955 -0.363
plot(m3)

qqnorm(resid(m3));qqline(resid(m3))

qqnorm(residuals(m3))
qqline(residuals(m3))

plot(fitted(m3), residuals(m3))

res<-simulateResiduals(m3)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.99107, p-value = 0.808
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
p <- ggpredict(m3,terms = "insect_counts")

ggplot(p, aes(x, x + mean(sppr_df_ss$value), y = predicted)) +
  geom_line(size = 1.1) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high), alpha = 0.2) +
  labs(
    x = "Raw counts",
    y = "Mean seed set per site",
    title = "Marginal prediction: seed set vs raw counts") +
  theme_bw()

#by season
m3<-lmer(Open ~ insect_counts + log(Flower_count)+Season + (1|Site),data=sppr_df_ss)
summary(m3)
## Linear mixed model fit by REML. t-tests use Satterthwaite's method [
## lmerModLmerTest]
## Formula: Open ~ insect_counts + log(Flower_count) + Season + (1 | Site)
##    Data: sppr_df_ss
## 
## REML criterion at convergence: 530
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -3.1932 -0.4666  0.1139  0.5236  1.9247 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  Site     (Intercept) 3.605    1.899   
##  Residual             5.644    2.376   
## Number of obs: 107, groups:  Site, 44
## 
## Fixed effects:
##                    Estimate Std. Error        df t value Pr(>|t|)    
## (Intercept)       8.749e+00  4.237e+00 8.397e+01   2.065   0.0420 *  
## insect_counts     1.520e-02  6.675e-03 1.016e+02   2.278   0.0248 *  
## log(Flower_count) 3.041e-01  8.287e-01 8.513e+01   0.367   0.7146    
## Season2022/23     3.961e+00  6.607e-01 6.831e+01   5.996 8.54e-08 ***
## Season2023/24     3.614e+00  6.153e-01 6.709e+01   5.873 1.47e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) insct_ lg(F_) S2022/
## insect_cnts  0.274                     
## lg(Flwr_cn) -0.989 -0.367              
## Sesn2022/23  0.456  0.341 -0.532       
## Sesn2023/24  0.148  0.050 -0.211  0.487
qqnorm(residuals(m3))
qqline(residuals(m3))

plot(fitted(m3), residuals(m3))

res<-simulateResiduals(m3)
plot(res)

testDispersion(res)

## 
##  DHARMa nonparametric dispersion test via sd of residuals fitted vs.
##  simulated
## 
## data:  simulationOutput
## dispersion = 0.97436, p-value = 0.896
## alternative hypothesis: two.sided
testZeroInflation(res)

## 
##  DHARMa zero-inflation test via comparison to expected zeros with
##  simulation under H0 = fitted model
## 
## data:  simulationOutput
## ratioObsSim = NaN, p-value = 1
## alternative hypothesis: two.sided
p <- ggpredict(m3,terms = c("insect_counts","Season"))
plot(p)

ggplot(p, aes(x = x, y = predicted,colour = group, fill = group)) +
  geom_line(size = 1.2) +
  geom_ribbon(aes(ymin = conf.low, ymax = conf.high),alpha = 0.25, colour = NA) +
  labs(x = "Raw counts",
    y = "Mean seed set per site",
    title = "Marginal prediction: seed set vs raw counts",
    colour = "Season",
    fill = "Season"
  ) +
  theme_bw() +
  theme(
    legend.position = "right",
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 11)
  )

ggplot() +
  geom_point(data = sppr_df_ss,aes(x = insect_counts, y = Open),alpha = 0.4, size = 2) +
  geom_line(data = p,aes(x = x, y = predicted, colour = group),size = 1.2) +
  geom_ribbon(data = p,aes(x = x, ymin = conf.low, ymax = conf.high,fill = group),alpha = 0.25, colour = NA) +
  labs(x = "Raw counts",
    y = "Mean seed set per site",
    title = "Marginal prediction: seed set vs raw counts",
    colour = "Season",
    fill = "Season"
  ) +
  theme_bw()

Beta Diversity

Beta diversity analysis Adonis2 works with nesting via a strata factor that blocks permutations to within groups however if you add both site and season you run out of permutations. instead im running this within a season to get around the random effects of season and sites

#beta diversity

###############################
#redoing this within a season:#
###############################
#impact of season overall with interaction of season and boundary
perm<-adonis2(counts_wide ~ Boundary_descriptor *Season+`Flower count`, data = meta_data_bior,strata=meta_data_bior$Site,by='terms')
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = counts_wide ~ Boundary_descriptor * Season + `Flower count`, data = meta_data_bior, by = "terms", strata = meta_data_bior$Site)
##                             Df SumOfSqs      R2      F Pr(>F)    
## Boundary_descriptor          5   3.0975 0.10938 2.8077  0.002 ** 
## Season                       2   1.5839 0.05593 3.5891  0.001 ***
## `Flower count`               1   0.8155 0.02880 3.6957  0.001 ***
## Boundary_descriptor:Season   4   0.7564 0.02671 0.8570  0.539    
## Residual                   100  22.0649 0.77918                  
## Total                      112  28.3181 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#impact of season overall without interaction of season and boundary
perm<-adonis2(counts_wide ~ Boundary_descriptor +Season+`Flower count`, data = meta_data_bior,strata=meta_data_bior$Site,by='terms')
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Blocks:  strata 
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = counts_wide ~ Boundary_descriptor + Season + `Flower count`, data = meta_data_bior, by = "terms", strata = meta_data_bior$Site)
##                      Df SumOfSqs      R2      F Pr(>F)    
## Boundary_descriptor   5   3.0975 0.10938 2.8232  0.001 ***
## Season                2   1.5839 0.05593 3.6090  0.001 ***
## `Flower count`        1   0.8155 0.02880 3.7162  0.001 ***
## Residual            104  22.8212 0.80589                  
## Total               112  28.3181 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#season and flower count are significant predictors of species composition

#this for the paper 
#dont include a strata as within a season each site is only measured once and between season issue is dealt with by doing one season at a time
#2021/22
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2021/22')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence     9
## 2 On-farm bare fence     10
## 3 One year old            6
## 4 Old                    12
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"One year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
perm<-adonis2(counts_wide_filt ~ Boundary_descriptor +`Flower count`, data = meta_data_bior_season,by='terms')
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = counts_wide_filt ~ Boundary_descriptor + `Flower count`, data = meta_data_bior_season, by = "terms")
##                     Df SumOfSqs      R2      F Pr(>F)  
## Boundary_descriptor  3   0.6443 0.07642 0.9494  0.518  
## `Flower count`       1   0.5475 0.06494 2.4202  0.026 *
## Residual            32   7.2387 0.85864                
## Total               36   8.4305 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
perm<-adonis2(counts_wide_filt ~ Boundary_descriptor +`Flower count`, data = meta_data_bior_season)
perm
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = counts_wide_filt ~ Boundary_descriptor + `Flower count`, data = meta_data_bior_season)
##          Df SumOfSqs      R2      F Pr(>F)
## Model     4   1.1918 0.14136 1.3171  0.157
## Residual 32   7.2387 0.85864              
## Total    36   8.4305 1.00000
pairwise_adonis<-pairwise.adonis2(counts_wide_filt ~ Boundary_descriptor +`Flower count`, data = meta_data_bior_season)
pairwise_adonis
## $parent_call
## [1] "counts_wide_filt ~ Boundary_descriptor + `Flower count` , strata = Null , permutations 999"
## 
## $`Off-farm bare fence_vs_One year old`
##          Df SumOfSqs      R2      F Pr(>F)
## Model     2   0.5163 0.14528 1.0199   0.39
## Residual 12   3.0376 0.85472              
## Total    14   3.5539 1.00000              
## 
## $`Off-farm bare fence_vs_Old`
##          Df SumOfSqs      R2      F Pr(>F)
## Model     2   0.5720 0.12158 1.2457  0.276
## Residual 18   4.1325 0.87842              
## Total    20   4.7045 1.00000              
## 
## $`Off-farm bare fence_vs_On-farm bare fence`
##          Df SumOfSqs      R2      F Pr(>F)
## Model     2   0.3979 0.09393 0.8294  0.605
## Residual 16   3.8377 0.90607              
## Total    18   4.2356 1.00000              
## 
## $`One year old_vs_Old`
##          Df SumOfSqs      R2     F Pr(>F)  
## Model     2   0.7274 0.18699 1.725  0.091 .
## Residual 15   3.1628 0.81301               
## Total    17   3.8903 1.00000               
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`One year old_vs_On-farm bare fence`
##          Df SumOfSqs     R2      F Pr(>F)
## Model     2   0.6114 0.1694 1.3257  0.225
## Residual 13   2.9980 0.8306              
## Total    15   3.6094 1.0000              
## 
## $`Old_vs_On-farm bare fence`
##          Df SumOfSqs     R2      F Pr(>F)
## Model     2   0.5455 0.1167 1.2552  0.243
## Residual 19   4.1285 0.8833              
## Total    21   4.6740 1.0000              
## 
## attr(,"class")
## [1] "pwadstrata" "list"
#2022/23
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2022/23')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence     9
## 2 On-farm bare fence     10
## 3 Two year old            7
## 4 Old                    12
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"Two year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
perm<-adonis2(counts_wide_filt ~ Boundary_descriptor +`Flower count`, data = meta_data_bior_season,by='terms')
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = counts_wide_filt ~ Boundary_descriptor + `Flower count`, data = meta_data_bior_season, by = "terms")
##                     Df SumOfSqs      R2      F Pr(>F)    
## Boundary_descriptor  3   1.7204 0.19275 2.8817  0.001 ***
## `Flower count`       1   0.6381 0.07149 3.2063  0.007 ** 
## Residual            33   6.5672 0.73576                  
## Total               37   8.9257 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
perm<-adonis2(counts_wide_filt ~ Boundary_descriptor +`Flower count`, data = meta_data_bior_season)
perm
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = counts_wide_filt ~ Boundary_descriptor + `Flower count`, data = meta_data_bior_season)
##          Df SumOfSqs      R2      F Pr(>F)    
## Model     4   2.3585 0.26424 2.9628  0.001 ***
## Residual 33   6.5672 0.73576                  
## Total    37   8.9257 1.00000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise_adonis<-pairwise.adonis2(counts_wide_filt ~ Boundary_descriptor +`Flower count`, data = meta_data_bior_season)
pairwise_adonis
## $parent_call
## [1] "counts_wide_filt ~ Boundary_descriptor + `Flower count` , strata = Null , permutations 999"
## 
## $`Off-farm bare fence_vs_Two year old`
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     2   0.8664 0.21276 1.7567  0.011 *
## Residual 13   3.2055 0.78724                
## Total    15   4.0719 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Off-farm bare fence_vs_Old`
##          Df SumOfSqs     R2      F Pr(>F)    
## Model     2   1.3040 0.2833 3.5576  0.001 ***
## Residual 18   3.2989 0.7167                  
## Total    20   4.6029 1.0000                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Off-farm bare fence_vs_On-farm bare fence`
##          Df SumOfSqs      R2      F Pr(>F)
## Model     2   0.6868 0.15638 1.4829  0.125
## Residual 16   3.7052 0.84362              
## Total    18   4.3920 1.00000              
## 
## $`Two year old_vs_Old`
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     2  0.78383 0.24887 2.6506  0.004 **
## Residual 16  2.36570 0.75113                 
## Total    18  3.14952 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Two year old_vs_On-farm bare fence`
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     2   1.1375 0.27249 2.6219  0.003 **
## Residual 14   3.0368 0.72751                 
## Total    16   4.1743 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Old_vs_On-farm bare fence`
##          Df SumOfSqs      R2     F Pr(>F)    
## Model     2   1.3699 0.30018 4.075  0.001 ***
## Residual 19   3.1936 0.69982                 
## Total    21   4.5635 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## attr(,"class")
## [1] "pwadstrata" "list"
#2023/24
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2023/24')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence    11
## 2 On-farm bare fence      7
## 3 Three year old          7
## 4 Old                    13
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"Three year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
perm<-adonis2(counts_wide_filt ~ Boundary_descriptor +`Flower count`, data = meta_data_bior_season,by='terms')
perm
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = counts_wide_filt ~ Boundary_descriptor + `Flower count`, data = meta_data_bior_season, by = "terms")
##                     Df SumOfSqs      R2      F Pr(>F)  
## Boundary_descriptor  3   1.1584 0.12924 1.7378  0.014 *
## `Flower count`       1   0.4727 0.05273 2.1273  0.023 *
## Residual            33   7.3326 0.81803                
## Total               37   8.9637 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
perm<-adonis2(counts_wide_filt ~ Boundary_descriptor +`Flower count`, data = meta_data_bior_season)
perm
## Permutation test for adonis under reduced model
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = counts_wide_filt ~ Boundary_descriptor + `Flower count`, data = meta_data_bior_season)
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     4   1.6311 0.18197 1.8352  0.005 **
## Residual 33   7.3326 0.81803                 
## Total    37   8.9637 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
pairwise_adonis<-pairwise.adonis2(counts_wide_filt ~ Boundary_descriptor +`Flower count`, data = meta_data_bior_season)
pairwise_adonis
## $parent_call
## [1] "counts_wide_filt ~ Boundary_descriptor + `Flower count` , strata = Null , permutations 999"
## 
## $`Off-farm bare fence_vs_Three year old`
##          Df SumOfSqs      R2      F Pr(>F)
## Model     2   0.5968 0.14372 1.2588  0.237
## Residual 15   3.5556 0.85628              
## Total    17   4.1523 1.00000              
## 
## $`Off-farm bare fence_vs_On-farm bare fence`
##          Df SumOfSqs      R2      F Pr(>F)
## Model     2   0.5809 0.13387 1.1592  0.269
## Residual 15   3.7585 0.86613              
## Total    17   4.3394 1.00000              
## 
## $`Off-farm bare fence_vs_Old`
##          Df SumOfSqs      R2      F Pr(>F)   
## Model     2   1.1078 0.19315 2.5135  0.003 **
## Residual 21   4.6276 0.80685                 
## Total    23   5.7353 1.00000                 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## $`Three year old_vs_On-farm bare fence`
##          Df SumOfSqs      R2      F Pr(>F)
## Model     2  0.56908 0.18632 1.2594  0.216
## Residual 11  2.48525 0.81368              
## Total    13  3.05433 1.00000              
## 
## $`Three year old_vs_Old`
##          Df SumOfSqs      R2      F Pr(>F)
## Model     2   0.5406 0.13969 1.3802  0.172
## Residual 17   3.3290 0.86031              
## Total    19   3.8696 1.00000              
## 
## $`On-farm bare fence_vs_Old`
##          Df SumOfSqs      R2      F Pr(>F)  
## Model     2   0.7832 0.17524 1.8061  0.031 *
## Residual 17   3.6858 0.82476                
## Total    19   4.4690 1.00000                
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## attr(,"class")
## [1] "pwadstrata" "list"

NMDS

NMDS plot

#NMDS is struggling to find a repeated solution. I think this is due to the rare species
#https://stats.stackexchange.com/questions/649643/best-solution-was-not-repeated-no-reliable-result-for-metamds

pk_NMDS <- metaMDS(counts_wide,trymax=200,distance='bray')
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2652953 
## Run 1 stress 0.2653193 
## ... Procrustes: rmse 0.002870692  max resid 0.02332905 
## Run 2 stress 0.265368 
## ... Procrustes: rmse 0.004002067  max resid 0.02835626 
## Run 3 stress 0.290287 
## Run 4 stress 0.2653198 
## ... Procrustes: rmse 0.002855275  max resid 0.02252978 
## Run 5 stress 0.2653191 
## ... Procrustes: rmse 0.002722492  max resid 0.02147588 
## Run 6 stress 0.2653675 
## ... Procrustes: rmse 0.004020604  max resid 0.02863624 
## Run 7 stress 0.2653208 
## ... Procrustes: rmse 0.002642413  max resid 0.02330662 
## Run 8 stress 0.2653934 
## ... Procrustes: rmse 0.004700631  max resid 0.02863612 
## Run 9 stress 0.2653207 
## ... Procrustes: rmse 0.002583372  max resid 0.02283482 
## Run 10 stress 0.2652947 
## ... New best solution
## ... Procrustes: rmse 0.001257342  max resid 0.00895897 
## ... Similar to previous best
## Run 11 stress 0.2652944 
## ... New best solution
## ... Procrustes: rmse 7.467386e-05  max resid 0.0006476448 
## ... Similar to previous best
## Run 12 stress 0.2653675 
## ... Procrustes: rmse 0.003804391  max resid 0.0283176 
## Run 13 stress 0.2837519 
## Run 14 stress 0.2814547 
## Run 15 stress 0.2944526 
## Run 16 stress 0.2652941 
## ... New best solution
## ... Procrustes: rmse 0.0003010253  max resid 0.001962343 
## ... Similar to previous best
## Run 17 stress 0.2653207 
## ... Procrustes: rmse 0.002836651  max resid 0.02295002 
## Run 18 stress 0.2653208 
## ... Procrustes: rmse 0.002986822  max resid 0.02437684 
## Run 19 stress 0.2653209 
## ... Procrustes: rmse 0.002989487  max resid 0.02438927 
## Run 20 stress 0.2653208 
## ... Procrustes: rmse 0.002984678  max resid 0.02416793 
## *** Best solution repeated 1 times
pk_NMDS
## 
## Call:
## metaMDS(comm = counts_wide, distance = "bray", trymax = 200) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(sqrt(counts_wide)) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.2652941 
## Stress type 1, weak ties
## Best solution was repeated 1 time in 20 tries
## The best solution was from try 16 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'wisconsin(sqrt(counts_wide))'
#stress is a little high (ideally <0.2)

stressplot(pk_NMDS)

plot(pk_NMDS)

plot_df <- scores(pk_NMDS, display = "sites") %>% 
  as.data.frame() %>% 
  rownames_to_column("site") %>% 
  full_join(meta_data, by = c("site" = "unique_sample"))

plot_df$Boundary_descriptor <- factor(plot_df$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)


plot_nmds <- ggplot(plot_df, aes(x = NMDS1, y = NMDS2, color = Boundary_descriptor)) +
  geom_point(size = 2, alpha = 0.8) +
  stat_ellipse(linetype = 2, linewidth = 0.5) +
  theme_bw()+
    scale_colour_manual(values=col_bound)+
  labs(title = "NMDS",
       colour = "Boundary type")
plot_nmds

#split by season
#2021/22
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2021/22')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence     9
## 2 On-farm bare fence     10
## 3 One year old            6
## 4 Old                    12
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"Three year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
pk_NMDS <- metaMDS(counts_wide_filt,trymax=200,distance='bray')
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2416754 
## Run 1 stress 0.266341 
## Run 2 stress 0.2415845 
## ... New best solution
## ... Procrustes: rmse 0.007641302  max resid 0.0346257 
## Run 3 stress 0.2450924 
## Run 4 stress 0.2431584 
## Run 5 stress 0.2607282 
## Run 6 stress 0.2424638 
## Run 7 stress 0.2516884 
## Run 8 stress 0.2602867 
## Run 9 stress 0.2518254 
## Run 10 stress 0.2481453 
## Run 11 stress 0.2589563 
## Run 12 stress 0.2547208 
## Run 13 stress 0.241964 
## ... Procrustes: rmse 0.02431377  max resid 0.07729592 
## Run 14 stress 0.2502586 
## Run 15 stress 0.2418345 
## ... Procrustes: rmse 0.02147062  max resid 0.0742688 
## Run 16 stress 0.241964 
## ... Procrustes: rmse 0.02427348  max resid 0.07734234 
## Run 17 stress 0.2573669 
## Run 18 stress 0.26041 
## Run 19 stress 0.2419749 
## ... Procrustes: rmse 0.02437051  max resid 0.07765798 
## Run 20 stress 0.2485534 
## Run 21 stress 0.2624156 
## Run 22 stress 0.2463353 
## Run 23 stress 0.2591142 
## Run 24 stress 0.248141 
## Run 25 stress 0.2491103 
## Run 26 stress 0.2423285 
## Run 27 stress 0.2416754 
## ... Procrustes: rmse 0.007638298  max resid 0.03464011 
## Run 28 stress 0.245013 
## Run 29 stress 0.2491105 
## Run 30 stress 0.2465976 
## Run 31 stress 0.2422662 
## Run 32 stress 0.2506363 
## Run 33 stress 0.2634569 
## Run 34 stress 0.2513892 
## Run 35 stress 0.2423443 
## Run 36 stress 0.2448412 
## Run 37 stress 0.2562846 
## Run 38 stress 0.2430131 
## Run 39 stress 0.2464613 
## Run 40 stress 0.2597 
## Run 41 stress 0.2430129 
## Run 42 stress 0.2415844 
## ... New best solution
## ... Procrustes: rmse 9.876787e-05  max resid 0.0004464989 
## ... Similar to previous best
## *** Best solution repeated 1 times
pk_NMDS
## 
## Call:
## metaMDS(comm = counts_wide_filt, distance = "bray", trymax = 200) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(sqrt(counts_wide_filt)) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.2415844 
## Stress type 1, weak ties
## Best solution was repeated 1 time in 42 tries
## The best solution was from try 42 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'wisconsin(sqrt(counts_wide_filt))'
#stress is a little high (ideally <0.2)

stressplot(pk_NMDS)

plot_df <- scores(pk_NMDS, display = "sites") %>% 
  as.data.frame() %>% 
  rownames_to_column("site") %>% 
  full_join(meta_data_bior_season, by = c("site" = "unique_sample"))

plot_df$Boundary_descriptor <- factor(plot_df$Boundary_descriptor,levels = c('Off-farm bare fence','On-farm bare fence',"One year old","Old"),ordered = T)

col_bound_cut<-c('chocolate','#FDE725FF',"#5DC863FF","#440154FF")

plot_nmds <- ggplot(plot_df, aes(x = NMDS1, y = NMDS2, color = Boundary_descriptor)) +
  geom_point(size = 2, alpha = 0.8) +
  stat_ellipse(linetype = 2, linewidth = 0.5) +
  theme_bw()+
    scale_colour_manual(values=col_bound_cut)+
  labs(title = "NMDS: 2021/22",
       colour = "Boundary type")
plot_nmds

#split by season
#2022/23
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2022/23')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence     9
## 2 On-farm bare fence     10
## 3 Two year old            7
## 4 Old                    12
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"Two year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
pk_NMDS <- metaMDS(counts_wide_filt,trymax=200,distance='bray')
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2200399 
## Run 1 stress 0.2221007 
## Run 2 stress 0.2229304 
## Run 3 stress 0.2200798 
## ... Procrustes: rmse 0.1342793  max resid 0.4190282 
## Run 4 stress 0.2230558 
## Run 5 stress 0.2279775 
## Run 6 stress 0.2372655 
## Run 7 stress 0.2185904 
## ... New best solution
## ... Procrustes: rmse 0.08287396  max resid 0.3326822 
## Run 8 stress 0.2188757 
## ... Procrustes: rmse 0.08044504  max resid 0.2420238 
## Run 9 stress 0.2201244 
## Run 10 stress 0.2274941 
## Run 11 stress 0.2413929 
## Run 12 stress 0.2376507 
## Run 13 stress 0.2205165 
## Run 14 stress 0.2400903 
## Run 15 stress 0.2166273 
## ... New best solution
## ... Procrustes: rmse 0.1103448  max resid 0.3778856 
## Run 16 stress 0.2297139 
## Run 17 stress 0.2188788 
## Run 18 stress 0.2403943 
## Run 19 stress 0.2291591 
## Run 20 stress 0.2385902 
## Run 21 stress 0.2281452 
## Run 22 stress 0.2199956 
## Run 23 stress 0.2230558 
## Run 24 stress 0.2283487 
## Run 25 stress 0.2339426 
## Run 26 stress 0.2198988 
## Run 27 stress 0.2218166 
## Run 28 stress 0.2198641 
## Run 29 stress 0.2230559 
## Run 30 stress 0.2198642 
## Run 31 stress 0.2225794 
## Run 32 stress 0.2305199 
## Run 33 stress 0.2239021 
## Run 34 stress 0.2357781 
## Run 35 stress 0.2306503 
## Run 36 stress 0.2504132 
## Run 37 stress 0.2357698 
## Run 38 stress 0.2227828 
## Run 39 stress 0.2188757 
## Run 40 stress 0.2319827 
## Run 41 stress 0.221491 
## Run 42 stress 0.2259456 
## Run 43 stress 0.2195135 
## Run 44 stress 0.2188757 
## Run 45 stress 0.2423184 
## Run 46 stress 0.2265404 
## Run 47 stress 0.2167362 
## ... Procrustes: rmse 0.008793761  max resid 0.04363824 
## Run 48 stress 0.218263 
## Run 49 stress 0.2235143 
## Run 50 stress 0.2222452 
## Run 51 stress 0.2304745 
## Run 52 stress 0.2196899 
## Run 53 stress 0.2254325 
## Run 54 stress 0.2316126 
## Run 55 stress 0.2233843 
## Run 56 stress 0.2187671 
## Run 57 stress 0.2198977 
## Run 58 stress 0.22004 
## Run 59 stress 0.2349971 
## Run 60 stress 0.2349325 
## Run 61 stress 0.2187671 
## Run 62 stress 0.225068 
## Run 63 stress 0.2198642 
## Run 64 stress 0.2211172 
## Run 65 stress 0.2306928 
## Run 66 stress 0.2258709 
## Run 67 stress 0.2256673 
## Run 68 stress 0.2213179 
## Run 69 stress 0.2224119 
## Run 70 stress 0.2210284 
## Run 71 stress 0.2389982 
## Run 72 stress 0.2210284 
## Run 73 stress 0.2201245 
## Run 74 stress 0.2682761 
## Run 75 stress 0.2241925 
## Run 76 stress 0.2336731 
## Run 77 stress 0.2234829 
## Run 78 stress 0.2279803 
## Run 79 stress 0.2254332 
## Run 80 stress 0.2205164 
## Run 81 stress 0.2390322 
## Run 82 stress 0.2242414 
## Run 83 stress 0.2281563 
## Run 84 stress 0.2391827 
## Run 85 stress 0.2201244 
## Run 86 stress 0.2198249 
## Run 87 stress 0.2272844 
## Run 88 stress 0.2229955 
## Run 89 stress 0.2235143 
## Run 90 stress 0.2243414 
## Run 91 stress 0.2213179 
## Run 92 stress 0.2271985 
## Run 93 stress 0.2252956 
## Run 94 stress 0.2244726 
## Run 95 stress 0.2214987 
## Run 96 stress 0.2235142 
## Run 97 stress 0.2229957 
## Run 98 stress 0.2506218 
## Run 99 stress 0.2281447 
## Run 100 stress 0.2201244 
## Run 101 stress 0.2228869 
## Run 102 stress 0.2198249 
## Run 103 stress 0.2283104 
## Run 104 stress 0.2235142 
## Run 105 stress 0.2343498 
## Run 106 stress 0.2191616 
## Run 107 stress 0.2218271 
## Run 108 stress 0.2367727 
## Run 109 stress 0.2223432 
## Run 110 stress 0.2320262 
## Run 111 stress 0.2332069 
## Run 112 stress 0.2343498 
## Run 113 stress 0.2324834 
## Run 114 stress 0.2302157 
## Run 115 stress 0.2492445 
## Run 116 stress 0.2201303 
## Run 117 stress 0.221491 
## Run 118 stress 0.2229303 
## Run 119 stress 0.2244616 
## Run 120 stress 0.2307148 
## Run 121 stress 0.2555844 
## Run 122 stress 0.2487685 
## Run 123 stress 0.2220486 
## Run 124 stress 0.2337981 
## Run 125 stress 0.2195134 
## Run 126 stress 0.221318 
## Run 127 stress 0.2424425 
## Run 128 stress 0.2254245 
## Run 129 stress 0.2274946 
## Run 130 stress 0.2389194 
## Run 131 stress 0.2297144 
## Run 132 stress 0.2401712 
## Run 133 stress 0.2369539 
## Run 134 stress 0.2304847 
## Run 135 stress 0.2339548 
## Run 136 stress 0.2244725 
## Run 137 stress 0.2250803 
## Run 138 stress 0.2343501 
## Run 139 stress 0.2270015 
## Run 140 stress 0.2200399 
## Run 141 stress 0.2327005 
## Run 142 stress 0.2309103 
## Run 143 stress 0.2315138 
## Run 144 stress 0.2283447 
## Run 145 stress 0.227494 
## Run 146 stress 0.2198977 
## Run 147 stress 0.2188784 
## Run 148 stress 0.2198979 
## Run 149 stress 0.2270776 
## Run 150 stress 0.2297148 
## Run 151 stress 0.2166269 
## ... New best solution
## ... Procrustes: rmse 0.0005486429  max resid 0.002698159 
## ... Similar to previous best
## *** Best solution repeated 1 times
pk_NMDS
## 
## Call:
## metaMDS(comm = counts_wide_filt, distance = "bray", trymax = 200) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(sqrt(counts_wide_filt)) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.2166269 
## Stress type 1, weak ties
## Best solution was repeated 1 time in 151 tries
## The best solution was from try 151 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'wisconsin(sqrt(counts_wide_filt))'
#stress is a little high (ideally <0.2)

stressplot(pk_NMDS)

plot_df <- scores(pk_NMDS, display = "sites") %>% 
  as.data.frame() %>% 
  rownames_to_column("site") %>% 
  full_join(meta_data_bior_season, by = c("site" = "unique_sample"))

plot_df$Boundary_descriptor <- factor(plot_df$Boundary_descriptor,levels = c('Off-farm bare fence','On-farm bare fence',"Two year old","Old"),ordered = T)

col_bound_cut<-c('chocolate','#FDE725FF',"#21908CFF","#440154FF")

plot_nmds <- ggplot(plot_df, aes(x = NMDS1, y = NMDS2, color = Boundary_descriptor)) +
  geom_point(size = 2, alpha = 0.8) +
  stat_ellipse(linetype = 2, linewidth = 0.5) +
  theme_bw()+
    scale_colour_manual(values=col_bound_cut)+
  labs(title = "NMDS: 2022/23",
       colour = "Boundary type")
plot_nmds

#split by season
#2023/24
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2023/24')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence    11
## 2 On-farm bare fence      7
## 3 Three year old          7
## 4 Old                    13
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"Three year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
pk_NMDS <- metaMDS(counts_wide_filt,trymax=200,distance='bray')
## Square root transformation
## Wisconsin double standardization
## Run 0 stress 0.2597324 
## Run 1 stress 0.2659622 
## Run 2 stress 0.2771353 
## Run 3 stress 0.2779188 
## Run 4 stress 0.2585376 
## ... New best solution
## ... Procrustes: rmse 0.04868828  max resid 0.1671177 
## Run 5 stress 0.3081875 
## Run 6 stress 0.2669785 
## Run 7 stress 0.263064 
## Run 8 stress 0.2772616 
## Run 9 stress 0.2593985 
## Run 10 stress 0.2590248 
## ... Procrustes: rmse 0.01410659  max resid 0.06468358 
## Run 11 stress 0.2638926 
## Run 12 stress 0.2648893 
## Run 13 stress 0.2742171 
## Run 14 stress 0.2894945 
## Run 15 stress 0.2643817 
## Run 16 stress 0.2610237 
## Run 17 stress 0.2590136 
## ... Procrustes: rmse 0.02382729  max resid 0.08634661 
## Run 18 stress 0.2672734 
## Run 19 stress 0.2811008 
## Run 20 stress 0.2857846 
## Run 21 stress 0.2620848 
## Run 22 stress 0.2682274 
## Run 23 stress 0.2676588 
## Run 24 stress 0.2917803 
## Run 25 stress 0.2596362 
## Run 26 stress 0.2862996 
## Run 27 stress 0.2669057 
## Run 28 stress 0.2871231 
## Run 29 stress 0.2686267 
## Run 30 stress 0.2596951 
## Run 31 stress 0.2769328 
## Run 32 stress 0.2770305 
## Run 33 stress 0.2593985 
## Run 34 stress 0.2672735 
## Run 35 stress 0.2683262 
## Run 36 stress 0.2857482 
## Run 37 stress 0.268739 
## Run 38 stress 0.2697331 
## Run 39 stress 0.26849 
## Run 40 stress 0.2620969 
## Run 41 stress 0.2746289 
## Run 42 stress 0.2718595 
## Run 43 stress 0.2657519 
## Run 44 stress 0.2669196 
## Run 45 stress 0.2587534 
## ... Procrustes: rmse 0.01924406  max resid 0.07467784 
## Run 46 stress 0.2874862 
## Run 47 stress 0.2682823 
## Run 48 stress 0.2685927 
## Run 49 stress 0.279802 
## Run 50 stress 0.2713753 
## Run 51 stress 0.2586903 
## ... Procrustes: rmse 0.01801287  max resid 0.0716313 
## Run 52 stress 0.2725134 
## Run 53 stress 0.2649727 
## Run 54 stress 0.2646923 
## Run 55 stress 0.39761 
## Run 56 stress 0.2979573 
## Run 57 stress 0.2590249 
## ... Procrustes: rmse 0.0141132  max resid 0.06459096 
## Run 58 stress 0.262916 
## Run 59 stress 0.2778678 
## Run 60 stress 0.2753146 
## Run 61 stress 0.2751971 
## Run 62 stress 0.2758703 
## Run 63 stress 0.2781459 
## Run 64 stress 0.2654015 
## Run 65 stress 0.2774688 
## Run 66 stress 0.2796911 
## Run 67 stress 0.2701735 
## Run 68 stress 0.272861 
## Run 69 stress 0.2672735 
## Run 70 stress 0.2682273 
## Run 71 stress 0.2621322 
## Run 72 stress 0.2781028 
## Run 73 stress 0.277774 
## Run 74 stress 0.2590249 
## ... Procrustes: rmse 0.01410286  max resid 0.06471438 
## Run 75 stress 0.2635039 
## Run 76 stress 0.2682825 
## Run 77 stress 0.2653971 
## Run 78 stress 0.2789233 
## Run 79 stress 0.2680581 
## Run 80 stress 0.2809016 
## Run 81 stress 0.2847075 
## Run 82 stress 0.2705313 
## Run 83 stress 0.2685438 
## Run 84 stress 0.2647062 
## Run 85 stress 0.2629608 
## Run 86 stress 0.2667733 
## Run 87 stress 0.25953 
## Run 88 stress 0.3046741 
## Run 89 stress 0.2604837 
## Run 90 stress 0.2788133 
## Run 91 stress 0.2954518 
## Run 92 stress 0.3058663 
## Run 93 stress 0.2723491 
## Run 94 stress 0.2663432 
## Run 95 stress 0.2697884 
## Run 96 stress 0.2590248 
## ... Procrustes: rmse 0.01411299  max resid 0.06461469 
## Run 97 stress 0.2732458 
## Run 98 stress 0.2610496 
## Run 99 stress 0.2653971 
## Run 100 stress 0.2894684 
## Run 101 stress 0.2586022 
## ... Procrustes: rmse 0.01035525  max resid 0.04416587 
## Run 102 stress 0.2686511 
## Run 103 stress 0.2597324 
## Run 104 stress 0.2688112 
## Run 105 stress 0.2700218 
## Run 106 stress 0.2924035 
## Run 107 stress 0.2848592 
## Run 108 stress 0.2666081 
## Run 109 stress 0.3128712 
## Run 110 stress 0.2638919 
## Run 111 stress 0.2746611 
## Run 112 stress 0.2808698 
## Run 113 stress 0.2641246 
## Run 114 stress 0.2781527 
## Run 115 stress 0.2638919 
## Run 116 stress 0.308866 
## Run 117 stress 0.2906729 
## Run 118 stress 0.2680576 
## Run 119 stress 0.2683928 
## Run 120 stress 0.2922945 
## Run 121 stress 0.2730831 
## Run 122 stress 0.2621033 
## Run 123 stress 0.2910062 
## Run 124 stress 0.2647062 
## Run 125 stress 0.2802028 
## Run 126 stress 0.2629167 
## Run 127 stress 0.2622646 
## Run 128 stress 0.2703711 
## Run 129 stress 0.2809228 
## Run 130 stress 0.25953 
## Run 131 stress 0.2597324 
## Run 132 stress 0.2720263 
## Run 133 stress 0.2590277 
## ... Procrustes: rmse 0.0177516  max resid 0.08039445 
## Run 134 stress 0.2585376 
## ... Procrustes: rmse 3.162757e-05  max resid 0.0001085911 
## ... Similar to previous best
## *** Best solution repeated 1 times
pk_NMDS
## 
## Call:
## metaMDS(comm = counts_wide_filt, distance = "bray", trymax = 200) 
## 
## global Multidimensional Scaling using monoMDS
## 
## Data:     wisconsin(sqrt(counts_wide_filt)) 
## Distance: bray 
## 
## Dimensions: 2 
## Stress:     0.2585376 
## Stress type 1, weak ties
## Best solution was repeated 1 time in 134 tries
## The best solution was from try 4 (random start)
## Scaling: centring, PC rotation, halfchange scaling 
## Species: expanded scores based on 'wisconsin(sqrt(counts_wide_filt))'
#stress is a little high (ideally <0.2)

stressplot(pk_NMDS)

plot_df <- scores(pk_NMDS, display = "sites") %>% 
  as.data.frame() %>% 
  rownames_to_column("site") %>% 
  full_join(meta_data_bior_season, by = c("site" = "unique_sample"))

plot_df$Boundary_descriptor <- factor(plot_df$Boundary_descriptor,levels = c('Off-farm bare fence','On-farm bare fence',"Three year old","Old"),ordered = T)

col_bound_cut<-c('chocolate','#FDE725FF',"#3B528BFF","#440154FF")

plot_nmds <- ggplot(plot_df, aes(x = NMDS1, y = NMDS2, color = Boundary_descriptor)) +
  geom_point(size = 2, alpha = 0.8) +
  stat_ellipse(linetype = 2, linewidth = 0.5) +
  theme_bw()+
    scale_colour_manual(values=col_bound_cut)+
  labs(title = "NMDS: 2023/24",
       colour = "Boundary type")
plot_nmds

RDA

An alternative to the MDS that would align well with the adonis2 beta diversity analysis would be a rda (constrained PCA - PCA on predicted variables) and using the model. For the species lines note that I double the length here for plotting

counts_wide.Hellinger <- disttransform(counts_wide, method='hellinger')
#add conditions here for random effects:
#Ordination.model2 <- rda(counts_wide.Hellinger ~ Boundary_descriptorf +  offset(log(`Flower count`)) + Condition(Season)+Condition(Farm_name)+Condition(month_split), data=meta_data_bior, scaling="species")
meta_data_bior$log_flower <- scale(log1p(meta_data_bior$`Flower count`))
#Ordination.model2 <- rda(counts_wide.Hellinger ~ Boundary_descriptorf +  log_flower + Condition(Season), data=meta_data_bior, scaling="species")

Ordination.model2 <- rda(counts_wide.Hellinger ~ Boundary_descriptorf + `Flower count` + Season, data=meta_data_bior, scaling="species")
#Ordination.model2 <- rda(counts_wide.Hellinger ~ Boundary_descriptorf + `Flower count` + Condition(Season), data=meta_data_bior, scaling="species")

#Ordination.model2 <- rda(counts_wide.Hellinger ~ Boundary_descriptorf +  offset(log(`Flower count`)) + Condition(Season)+Condition(month_split), data=meta_data_bior, scaling="species")

plot2 <- ordiplot(Ordination.model2, choices=c(1,2))

sites.long2 <- sites.long(plot2, env.data=meta_data_bior)
species.long2 <- species.long(plot2)
axis.long2 <- axis.long(Ordination.model2, choices=c(1, 2))
sites.long2$Boundary_descriptor <- factor(sites.long2$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

#just dots
plotgg2 <- ggplot() + 
    geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
    geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
    xlab(axis.long2[1, "label"]) +
    ylab(axis.long2[2, "label"]) +  
    scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +    
    geom_point(data=sites.long2, 
               aes(x=axis1, y=axis2, colour=Boundary_descriptor), 
               size=3) +
   scale_colour_manual(values=col_bound)+
    theme_bw()

plotgg2

#95% circles
plotgg2 <- ggplot() + 
    geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
    geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
    xlab(axis.long2[1, "label"]) +
    ylab(axis.long2[2, "label"]) +  
    scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +    
    geom_point(data=sites.long2, 
               aes(x=axis1, y=axis2, colour=Boundary_descriptor), 
               size=3) +
  stat_ellipse(data=sites.long2, 
                      aes(x=axis1, y=axis2, colour=Boundary_descriptor, fill=Boundary_descriptor),
type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F) +
    scale_colour_manual(values=col_bound)+
     scale_fill_manual(values=col_bound)+
   theme_bw()

plotgg2

#add in species arrows
spec.envfit <- envfit(plot2, env=counts_wide.Hellinger)
spec.data.envfit <- data.frame(r=spec.envfit$vectors$r, p=spec.envfit$vectors$pvals)
species.long2 <- species.long(plot2, spec.data=spec.data.envfit)
#species that explain 10% of variation
species.long3 <- species.long2[species.long2$r >= 0.1, ]
#species.long3
ordiArrowMul(spec.envfit) #close to 2
## [1] 1.090675
plotgg2 <- ggplot() + 
    geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
    geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
    xlab(axis.long2[1, "label"]) +
    ylab(axis.long2[2, "label"]) +  
    scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +    
    geom_point(data=sites.long2, 
               aes(x=axis1, y=axis2, colour=Boundary_descriptor), 
               size=3) +
   scale_colour_manual(values=col_bound)+
    geom_segment(data=species.long3, 
                 aes(x=0, y=0, xend=axis1*2, yend=axis2*4), 
                 colour="black", size=0.7, arrow=arrow()) +
    geom_text_repel(data=species.long3, 
                    aes(x=axis1*2, y=axis2*2, label=labels),
                    colour="black") +
    theme_bw()

plotgg2

#species and circles
plotgg2 <- ggplot() + 
    geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
    geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
    xlab(axis.long2[1, "label"]) +
    ylab(axis.long2[2, "label"]) +  
    scale_x_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +
    scale_y_continuous(sec.axis = dup_axis(labels=NULL, name=NULL)) +    
    geom_point(data=sites.long2, 
               aes(x=axis1, y=axis2, colour=Boundary_descriptor), 
               size=3) +
  stat_ellipse(data=sites.long2, 
                      aes(x=axis1, y=axis2, colour=Boundary_descriptor, fill=Boundary_descriptor),
type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F) +
    scale_colour_manual(values=col_bound)+
     scale_fill_manual(values=col_bound)+
    geom_segment(data=species.long3, 
                 aes(x=0, y=0, xend=axis1*2, yend=axis2*2), 
                 colour="black", size=0.7, arrow=arrow()) +
    geom_text_repel(data=species.long3, 
                    aes(x=axis1*2, y=axis2*2, label=labels),
                    colour="black") +
    theme_bw()

plotgg2

#alternative would be ddRDA or capscale on bray distance (PCoA) which might be better for us for differences between communities
#cap <- capscale(counts_wide ~ Boundary_descriptorf + `Flower count` + Condition(Season), data = meta_data_bior, distance = "bray")
cap <- capscale(counts_wide ~ Boundary_descriptorf + `Flower count` + Season, data = meta_data_bior, distance = "bray")

#I cant get sites in as a condition it bounds it up too much but this issue isnt really a problem for within a season 
#cap <- capscale(counts_wide ~ Boundary_descriptorf + offset(log(`Flower count`)) + Condition(Season)+Condition(month_split), data = meta_data_bior, distance = "bray")
#cap <- capscale(counts_wide ~ Boundary_descriptorf + offset(log(`Flower count`)) + Condition(Season)+Condition(Farm_name)+Condition(month_split), data = meta_data_bior, distance = "bray")
#cap <- capscale(counts_wide.Hellinger ~ Boundary_descriptorf + offset(log(`Flower count`)) + Condition(Season)+Condition(Farm_name)+Condition(month_split), data = meta_data_bior, distance = "bray")

anova(cap, by = "terms")
## Permutation test for capscale under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## Model: capscale(formula = counts_wide ~ Boundary_descriptorf + `Flower count` + Season, data = meta_data_bior, distance = "bray")
##                       Df SumOfSqs      F Pr(>F)    
## Boundary_descriptorf   5   3.3142 2.4456  0.001 ***
## `Flower count`         1   0.6272 2.3142  0.008 ** 
## Season                 2   1.9109 3.5253  0.001 ***
## Residual             104  28.1872                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
plot(cap)

site_scores <- scores(cap, display = "sites")
species_scores <- scores(cap, display = "species")
centroids <- scores(cap, display = "cn")


df_sites <- as.data.frame(site_scores)
df_sites$unique_sample <- rownames(df_sites)
capscale_df <- left_join(df_sites,meta_data)
## Joining with `by = join_by(unique_sample)`
capscale_df$Boundary_descriptor <- factor(capscale_df$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

eig_vals <- cap$CCA$eig
prop_var <- eig_vals / sum(eig_vals)

xlab <- paste0("CAP1 (", round(prop_var[1] * 100, 1), "%)")
ylab <- paste0("CAP2 (", round(prop_var[2] * 100, 1), "%)")

#base plot
ggplot(capscale_df, aes(x = CAP1, y = CAP2, color = Boundary_descriptor)) +
  geom_point(size = 3) +
 scale_colour_manual(values=col_bound)+
  labs(x = xlab, y = ylab, color='Boundary type') +
  theme_bw()

#95% circles  
ggplot() +
  geom_point(data=capscale_df, aes(x = CAP1, y = CAP2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound)+
  scale_fill_manual(values=col_bound)+
  labs(x = xlab, y = ylab, color='Boundary type') +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  stat_ellipse(data=capscale_df, 
  aes(x = CAP1, y = CAP2, color = Boundary_descriptor,fill = Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  theme_bw()

#note species scores are calculated using euclidean distance in capscale which we assume correlates to brays
#so probably dump the species on in supplementary and say combined with standard RDA shows driven by lassio
# Calculate arrow length (distance from origin) to equate to species explaining most variation
#top species
sp_len <- sqrt(rowSums(species_scores^2))

# Rank species by length
head(sort(sp_len, decreasing = TRUE), 10)   # top 10 species for plot
##       Lasioglossum bee    NZ orange hover fly              Drone fly 
##              2.1920570              1.2939902              0.8397514 
##      Striped flesh fly              Honey bee         Pollenia flies 
##              0.4621239              0.3188515              0.3064117 
##          Other insects      Ichneumonid wasps    Seedcorn maggot fly 
##              0.2799742              0.2622711              0.2510284 
## Buff-tailed bumble bee 
##              0.2193852
keep <- names(sp_len[sp_len > quantile(sp_len, 0.9)])  # top 10% longest arrows
sp_filt <- species_scores[keep, ]

#species in plot      
ggplot() +
  geom_point(data=capscale_df, aes(x = CAP1, y = CAP2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound)+
  labs(x = xlab, y = ylab, color='Boundary type') +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=CAP1*2, yend=CAP2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  geom_text_repel(data=sp_filt, 
    aes(x=CAP1*2, y=CAP2*2, label=rownames(sp_filt)),
    colour="black") +
  theme_bw()

#circles      
ggplot() +
  geom_point(data=capscale_df, aes(x = CAP1, y = CAP2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound)+
  scale_fill_manual(values=col_bound)+
  labs(x = xlab, y = ylab, color='Boundary type') +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=CAP1*2, yend=CAP2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  geom_text_repel(data=sp_filt, 
    aes(x=CAP1*2, y=CAP2*2, label=rownames(sp_filt)),
    colour="black") +
  stat_ellipse(data=capscale_df, 
  aes(x = CAP1, y = CAP2, color = Boundary_descriptor,fill = Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  theme_bw()

#I think ddRDA is probably slightly better than capscale from what I've read but they do a similar thing 

#bound_dbrda <- dbrda(counts_wide ~ Boundary_descriptorf + `Flower count` + Condition(Season), data = meta_data_bior, distance = "bray")
#bound_dbrda <- dbrda(counts_wide ~ Boundary_descriptorf + `Flower count` +Season + Condition(Site), data = meta_data_bior, distance = "bray")
bound_dbrda <- dbrda(counts_wide ~ Boundary_descriptorf + `Flower count` +Season , data = meta_data_bior, distance = "bray")
#bound_dbrda <- dbrda(counts_wide ~ Boundary_descriptorf + offset(log(`Flower count`)) + Condition(Season)+Condition(month_split), data = meta_data_bior, distance = "bray")

#bound_dbrda <- dbrda(counts_wide ~ Boundary_descriptorf + offset(log(`Flower count`)) + Condition(Season)+Condition(Farm_name)+Condition(month_split), data = meta_data_bior, distance = "bray")
#bound_dbrda <- dbrda(counts_wide.Hellinger ~ Boundary_descriptorf + offset(log(`Flower count`)) + Condition(Season)+Condition(Farm_name)+Condition(month_split), data = meta_data_bior, distance = "bray")

bound_dbrda_summary<-summary(bound_dbrda)
plot(bound_dbrda)

anova(bound_dbrda, by = "terms")
## Permutation test for dbrda under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## Model: dbrda(formula = counts_wide ~ Boundary_descriptorf + `Flower count` + Season, data = meta_data_bior, distance = "bray")
##                       Df SumOfSqs      F Pr(>F)    
## Boundary_descriptorf   5   3.0975 2.8232  0.001 ***
## `Flower count`         1   0.5721 2.6073  0.005 ** 
## Season                 2   1.8272 4.1634  0.001 ***
## Residual             104  22.8212                  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
site_scores <- scores(bound_dbrda, display = "sites")
centroids <- scores(bound_dbrda, display = "cn")


df_sites <- as.data.frame(site_scores)
df_sites$unique_sample <- rownames(df_sites)
bound_dbrda_df <- left_join(df_sites,meta_data)
## Joining with `by = join_by(unique_sample)`
bound_dbrda_df$Boundary_descriptor <- factor(bound_dbrda_df$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence','One year old','Two year old','Three year old','Old'),ordered = T)

eig_vals <- bound_dbrda$CCA$eig
prop_var <- eig_vals / sum(eig_vals)

xlab <- paste0("dbRDA1 (", round(prop_var[1] * 100, 1), "%)")
ylab <- paste0("dbRDA2 (", round(prop_var[2] * 100, 1), "%)")

ggplot(bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor)) +
  geom_point(size = 3) +
 scale_colour_manual(values=col_bound)+
  labs(x = xlab, y = ylab, color='Boundary type') +
  theme_bw()

#with circles
#95% circles  
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound)+
  scale_fill_manual(values=col_bound)+
  labs(x = xlab, y = ylab, color='Boundary type') +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  stat_ellipse(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor,fill=Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  theme_bw()

#add in species arrows
#rdRDA doesnt add species scores in automatically (due to them being calculated from the eucludian distance)
#using hellinger transformed here
sppscores(bound_dbrda)<-counts_wide.Hellinger

# Calculate arrow length (distance from origin) to equate to species explaining most variation
#top species
species_scores <- scores(bound_dbrda, display = "species")

sp_len <- sqrt(rowSums(species_scores^2))

# Rank species by length
head(sort(sp_len, decreasing = TRUE), 10)   # top 10 species for plot
##       Lasioglossum.bee    NZ.orange.hover.fly              Drone.fly 
##              1.7907364              1.0501226              0.8524561 
##      Ichneumonid.wasps         Pollenia.flies      Striped.flesh.fly 
##              0.8186768              0.6620704              0.4985030 
##   Bronze.cluster.flies    Seedcorn.maggot.fly           Hylaeus.bees 
##              0.4710526              0.4529427              0.4061124 
## Buff.tailed.bumble.bee 
##              0.3990084
keep <- names(sp_len[sp_len > quantile(sp_len, 0.9)])  # top 10% longest arrows
sp_filt <- species_scores[keep, ]

#species in plot      
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound)+
  labs(x = xlab, y = ylab, color='Boundary type') +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=dbRDA1*2, yend=dbRDA2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  geom_text_repel(data=sp_filt, 
    aes(x=dbRDA1*2, y=dbRDA2*2, label=rownames(sp_filt)),
    colour="black") +
  theme_bw()

#add in species arrows & eclipses
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound)+
  scale_fill_manual(values=col_bound)+
  labs(x = xlab, y = ylab, color='Boundary type') +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=dbRDA1*2, yend=dbRDA2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  stat_ellipse(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor,fill=Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  geom_text_repel(data=sp_filt, 
    aes(x=dbRDA1*2, y=dbRDA2*2, label=rownames(sp_filt)),
    colour="black") +
  theme_bw()

#lets have a look within a season:
#2021/22
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2021/22')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence     9
## 2 On-farm bare fence     10
## 3 One year old            6
## 4 Old                    12
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"One year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
bound_dbrda <- dbrda(counts_wide_filt ~ Boundary_descriptorf +`Flower count` , data = meta_data_bior_season, distance = "bray")
#bound_dbrda <- dbrda(counts_wide_filt ~ Boundary_descriptorf + offset(log(`Flower count`)) + Condition(month_split), data = meta_data_bior_season, distance = "bray")



bound_dbrda_summary<-summary(bound_dbrda)
site_scores <- scores(bound_dbrda, display = "sites")
centroids <- scores(bound_dbrda, display = "cn")


df_sites <- as.data.frame(site_scores)
df_sites$unique_sample <- rownames(df_sites)
bound_dbrda_df <- left_join(df_sites,meta_data)
## Joining with `by = join_by(unique_sample)`
bound_dbrda_df$Boundary_descriptor
##  [1] Off-farm bare fence One year old        Old                
##  [4] On-farm bare fence  Old                 On-farm bare fence 
##  [7] Old                 Off-farm bare fence One year old       
## [10] One year old        On-farm bare fence  Old                
## [13] Off-farm bare fence One year old        Old                
## [16] On-farm bare fence  Off-farm bare fence Old                
## [19] On-farm bare fence  Off-farm bare fence Old                
## [22] On-farm bare fence  Off-farm bare fence Old                
## [25] On-farm bare fence  Off-farm bare fence Old                
## [28] On-farm bare fence  One year old        Old                
## [31] On-farm bare fence  Off-farm bare fence Old                
## [34] On-farm bare fence  Off-farm bare fence One year old       
## [37] Old                
## 6 Levels: Off-farm bare fence On-farm bare fence One year old ... Old
bound_dbrda_df$Boundary_descriptor <- factor(bound_dbrda_df$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence',"One year old","Old"),ordered = T)
col_bound_cut<-c('chocolate','#FDE725FF',"#5DC863FF","#440154FF")

eig_vals <- bound_dbrda$CCA$eig
prop_var <- eig_vals / sum(eig_vals)

xlab <- paste0("dbRDA1 (", round(prop_var[1] * 100, 1), "%)")
ylab <- paste0("dbRDA2 (", round(prop_var[2] * 100, 1), "%)")

#add in species arrows
#rdRDA doesnt add species scores in automatically (due to them being calculated from the eucludian distance)
#using hellinger transformed here
counts_wide.Hellinger_filt <- disttransform(counts_wide_filt, method='hellinger')

sppscores(bound_dbrda)<-counts_wide.Hellinger_filt

# Calculate arrow length (distance from origin) to equate to species explaining most variation
#top species
species_scores <- scores(bound_dbrda, display = "species")

sp_len <- sqrt(rowSums(species_scores^2))

# Rank species by length
head(sort(sp_len, decreasing = TRUE), 10)   # top 10 species for plot
##     NZ.orange.hover.fly        Lasioglossum.bee       Striped.flesh.fly 
##               0.9515702               0.8796693               0.3922125 
##               Drone.fly    Bronze.cluster.flies      NZ.black.hover.fly 
##               0.3458669               0.2131655               0.1723712 
##               Honey.bee          Pollenia.flies     Seedcorn.maggot.fly 
##               0.1468986               0.1233284               0.1226692 
## Eleven.spotted.ladybird 
##               0.1170217
keep <- names(sp_len[sp_len > quantile(sp_len, 0.9)])  # top 10% longest arrows
sp_filt <- species_scores[keep, ]

#eclipses
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound_cut)+
  scale_fill_manual(values=col_bound_cut)+
  labs(x = xlab, y = ylab, color='Boundary type',title = "2021/22") +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  stat_ellipse(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor,fill=Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  theme_bw()

#species in plot      
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound_cut)+
  labs(x = xlab, y = ylab, color='Boundary type',title = "2021/22") +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=dbRDA1*2, yend=dbRDA2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  geom_text_repel(data=sp_filt, 
    aes(x=dbRDA1*2, y=dbRDA2*2, label=rownames(sp_filt)),
    colour="black") +
  theme_bw()

#add in species arrows & eclipses
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound_cut)+
  scale_fill_manual(values=col_bound_cut)+
  labs(x = xlab, y = ylab, color='Boundary type',title = "2021/22") +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=dbRDA1*2, yend=dbRDA2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  stat_ellipse(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor,fill=Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  geom_text_repel(data=sp_filt, 
    aes(x=dbRDA1*2, y=dbRDA2*2, label=rownames(sp_filt)),
    colour="black") +
  theme_bw()

#lets have a look within a season:
#2022/23
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2022/23')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence     9
## 2 On-farm bare fence     10
## 3 Two year old            7
## 4 Old                    12
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"Two year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
bound_dbrda <- dbrda(counts_wide_filt ~ Boundary_descriptorf + `Flower count` , data = meta_data_bior_season, distance = "bray")
#bound_dbrda <- dbrda(counts_wide_filt ~ Boundary_descriptorf + offset(log(`Flower count`)) + Condition(month_split), data = meta_data_bior_season, distance = "bray")



bound_dbrda_summary<-summary(bound_dbrda)
site_scores <- scores(bound_dbrda, display = "sites")
centroids <- scores(bound_dbrda, display = "cn")


df_sites <- as.data.frame(site_scores)
df_sites$unique_sample <- rownames(df_sites)
bound_dbrda_df <- left_join(df_sites,meta_data)
## Joining with `by = join_by(unique_sample)`
bound_dbrda_df$Boundary_descriptor
##  [1] Off-farm bare fence Two year old        Two year old       
##  [4] Old                 On-farm bare fence  Old                
##  [7] On-farm bare fence  Old                 Off-farm bare fence
## [10] Two year old        Two year old        On-farm bare fence 
## [13] Old                 Off-farm bare fence Two year old       
## [16] Old                 On-farm bare fence  Off-farm bare fence
## [19] Old                 On-farm bare fence  Off-farm bare fence
## [22] Old                 On-farm bare fence  Off-farm bare fence
## [25] Old                 On-farm bare fence  Off-farm bare fence
## [28] Old                 On-farm bare fence  Two year old       
## [31] Old                 On-farm bare fence  Off-farm bare fence
## [34] Old                 On-farm bare fence  Off-farm bare fence
## [37] Two year old        Old                
## 6 Levels: Off-farm bare fence On-farm bare fence One year old ... Old
bound_dbrda_df$Boundary_descriptor <- factor(bound_dbrda_df$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence',"Two year old","Old"),ordered = T)
col_bound_cut<-c('chocolate','#FDE725FF',"#21908CFF","#440154FF")

eig_vals <- bound_dbrda$CCA$eig
prop_var <- eig_vals / sum(eig_vals)

xlab <- paste0("dbRDA1 (", round(prop_var[1] * 100, 1), "%)")
ylab <- paste0("dbRDA2 (", round(prop_var[2] * 100, 1), "%)")

#add in species arrows
#rdRDA doesnt add species scores in automatically (due to them being calculated from the eucludian distance)
#using hellinger transformed here
counts_wide.Hellinger_filt <- disttransform(counts_wide_filt, method='hellinger')

sppscores(bound_dbrda)<-counts_wide.Hellinger_filt

# Calculate arrow length (distance from origin) to equate to species explaining most variation
#top species
species_scores <- scores(bound_dbrda, display = "species")

sp_len <- sqrt(rowSums(species_scores^2))

# Rank species by length
head(sort(sp_len, decreasing = TRUE), 10)   # top 10 species for plot
##         Lasioglossum.bee                Drone.fly                Honey.bee 
##                1.6594107                0.6016680                0.5046045 
##      NZ.orange.hover.fly       NZ.black.hover.fly             Hylaeus.bees 
##                0.4707921                0.2820263                0.2307984 
##      Seedcorn.maggot.fly   Buff.tailed.bumble.bee Yellow.admiral.butterfly 
##                0.2283000                0.2259717                0.2188864 
##     Bronze.cluster.flies 
##                0.2157137
keep <- names(sp_len[sp_len > quantile(sp_len, 0.9)])  # top 10% longest arrows
sp_filt <- species_scores[keep, ]

#eclipses
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound_cut)+
  scale_fill_manual(values=col_bound_cut)+
  labs(x = xlab, y = ylab, color='Boundary type',title = "2022/23") +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  stat_ellipse(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor,fill=Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  theme_bw()

#species in plot      
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound_cut)+
  labs(x = xlab, y = ylab, color='Boundary type',title = "2022/23") +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=dbRDA1*2, yend=dbRDA2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  geom_text_repel(data=sp_filt, 
    aes(x=dbRDA1*2, y=dbRDA2*2, label=rownames(sp_filt)),
    colour="black") +
  theme_bw()

#add in species arrows & eclipses
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound_cut)+
  scale_fill_manual(values=col_bound_cut)+
  labs(x = xlab, y = ylab, color='Boundary type',title = "2022/23") +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=dbRDA1*2, yend=dbRDA2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  stat_ellipse(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor,fill=Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  geom_text_repel(data=sp_filt, 
    aes(x=dbRDA1*2, y=dbRDA2*2, label=rownames(sp_filt)),
    colour="black") +
  theme_bw()

#lets have a look within a season:
#2023/24
meta_data_bior_season<-meta_data_bior %>% filter(Season=='2023/24')
meta_data_bior_season %>% select(Boundary_descriptor) %>% group_by(Boundary_descriptor) %>% summarise(count = n())
## # A tibble: 4 × 2
##   Boundary_descriptor count
##   <fct>               <int>
## 1 Off-farm bare fence    11
## 2 On-farm bare fence      7
## 3 Three year old          7
## 4 Old                    13
meta_data_bior_season$Boundary_descriptorf<-factor(meta_data_bior_season$Boundary_descriptor, levels = c('Off-farm bare fence','On-farm bare fence',"Three year old","Old"))
counts_wide_filt <- counts_wide[rownames(counts_wide) %in% meta_data_bior_season$unique_sample, ]
counts_wide_filt <- counts_wide_filt[,colSums(counts_wide_filt) > 0]
bound_dbrda <- dbrda(counts_wide_filt ~ Boundary_descriptorf +`Flower count` , data = meta_data_bior_season, distance = "bray")
#bound_dbrda <- dbrda(counts_wide_filt ~ Boundary_descriptorf + offset(log(`Flower count`)) + Condition(month_split), data = meta_data_bior_season, distance = "bray")

bound_dbrda_summary<-summary(bound_dbrda)
site_scores <- scores(bound_dbrda, display = "sites")
centroids <- scores(bound_dbrda, display = "cn")


df_sites <- as.data.frame(site_scores)
df_sites$unique_sample <- rownames(df_sites)
bound_dbrda_df <- left_join(df_sites,meta_data)
## Joining with `by = join_by(unique_sample)`
bound_dbrda_df$Boundary_descriptor
##  [1] Off-farm bare fence Three year old      Off-farm bare fence
##  [4] Three year old      On-farm bare fence  Old                
##  [7] On-farm bare fence  Old                 Off-farm bare fence
## [10] On-farm bare fence  Old                 Off-farm bare fence
## [13] Old                 Off-farm bare fence Three year old     
## [16] Three year old      On-farm bare fence  Old                
## [19] Off-farm bare fence Three year old      Old                
## [22] Off-farm bare fence Old                 Old                
## [25] Off-farm bare fence Old                 Off-farm bare fence
## [28] On-farm bare fence  Off-farm bare fence Old                
## [31] On-farm bare fence  Three year old      Old                
## [34] Old                 On-farm bare fence  Off-farm bare fence
## [37] Three year old      Old                
## 6 Levels: Off-farm bare fence On-farm bare fence One year old ... Old
bound_dbrda_df$Boundary_descriptor <- factor(bound_dbrda_df$Boundary_descriptor,levels=c('Off-farm bare fence','On-farm bare fence',"Three year old","Old"),ordered = T)
col_bound_cut<-c('chocolate','#FDE725FF',"#3B528BFF","#440154FF")

eig_vals <- bound_dbrda$CCA$eig
prop_var <- eig_vals / sum(eig_vals)

xlab <- paste0("dbRDA1 (", round(prop_var[1] * 100, 1), "%)")
ylab <- paste0("dbRDA2 (", round(prop_var[2] * 100, 1), "%)")

#add in species arrows
#rdRDA doesnt add species scores in automatically (due to them being calculated from the eucludian distance)
#using hellinger transformed here
counts_wide.Hellinger_filt <- disttransform(counts_wide_filt, method='hellinger')

sppscores(bound_dbrda)<-counts_wide.Hellinger_filt

# Calculate arrow length (distance from origin) to equate to species explaining most variation
#top species
species_scores <- scores(bound_dbrda, display = "species")

sp_len <- sqrt(rowSums(species_scores^2))

# Rank species by length
head(sort(sp_len, decreasing = TRUE), 10)   # top 10 species for plot
##       Lasioglossum.bee         Pollenia.flies    NZ.orange.hover.fly 
##              0.9126707              0.6230509              0.4546874 
##     NZ.black.hover.fly           Hylaeus.bees      Ichneumonid.wasps 
##              0.3764921              0.3648282              0.3413372 
##          Other.insects   Bronze.cluster.flies      Striped.flesh.fly 
##              0.3403431              0.3152175              0.3089185 
## European.blue.blow.fly 
##              0.2554038
keep <- names(sp_len[sp_len > quantile(sp_len, 0.9)])  # top 10% longest arrows
sp_filt <- species_scores[keep, ]

#eclipses
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound_cut)+
  scale_fill_manual(values=col_bound_cut)+
  labs(x = xlab, y = ylab, color='Boundary type',title = "2023/24") +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  stat_ellipse(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor,fill=Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  theme_bw()

#species in plot      
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound_cut)+
  labs(x = xlab, y = ylab, color='Boundary type',title = "2023/24") +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=dbRDA1*2, yend=dbRDA2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  geom_text_repel(data=sp_filt, 
    aes(x=dbRDA1*2, y=dbRDA2*2, label=rownames(sp_filt)),
    colour="black") +
  theme_bw()

#add in species arrows & eclipses
ggplot() +
  geom_point(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor),size = 3) +
  scale_colour_manual(values=col_bound_cut)+
  scale_fill_manual(values=col_bound_cut)+
  labs(x = xlab, y = ylab, color='Boundary type',title = "2023/24") +
  geom_vline(xintercept = c(0), color = "grey70", linetype = 2) +
  geom_hline(yintercept = c(0), color = "grey70", linetype = 2) +  
  geom_segment(data=sp_filt, 
     aes(x=0, y=0, xend=dbRDA1*2, yend=dbRDA2*2), 
     colour="black", size=0.7, arrow=arrow()) +
  stat_ellipse(data=bound_dbrda_df, aes(x = dbRDA1, y = dbRDA2, color = Boundary_descriptor,fill=Boundary_descriptor), 
  type='t',level=0.95,geom='polygon',alpha=0.1,show.legend=F)+
  geom_text_repel(data=sp_filt, 
    aes(x=dbRDA1*2, y=dbRDA2*2, label=rownames(sp_filt)),
    colour="black") +
  theme_bw()

iNEXT

iNEXT (iNterpolation and EXTrapolation) is an R package, available in CRAN and Github, for rarefaction and extrapolation of species diversity (Hill numbers). It’s a useful package to use if you want to make any predictions about sampling size or diversity.

Hill numbers

Hill numbers are a set of diversity indices used in ecology and biodiversity assessments to quantify the richness and evenness of species in a given community. These indices were developed by Michael Hill and are widely used to summarize various aspects of biodiversity. Hill numbers are particularly useful because they provide a way to focus on different facets of diversity, depending on the specific questions or goals of a study. For a thorough introduction to Hill numbers check out: Alberdi & Gilbert

There are several Hill numbers, but they all share a common formula that relies on an exponent, denoted as “q.” The choice of the exponent determines which aspect of diversity is emphasized:

  • Richness (q = 0): Hill number with q = 0 is also known as the “species richness” or “taxonomic distinctness” index. It simply counts the number of unique species in a community without considering their abundances. This index is valuable when you want to emphasize the presence or absence of species but not their relative abundance.

  • Exponential Shannon entropy (q = 1): This index corresponds to the Shannon-Wiener diversity index (commonly referred to as Shannon entropy) when q is set to 1. It considers both the richness and the evenness of species abundances. It is often used when you want to give equal importance to species richness and evenness.

  • Inverse Simpson index (q = 2): The Inverse Simpson index corresponds to the Simpson diversity index when q is set to 2. This index emphasizes the dominance of the most abundant species in a community. It is often used when you want to give more weight to the presence of dominant species.

  • Higher Hill numbers (q > 2): These Hill numbers give increasing weight to the most abundant species, placing more emphasis on the dominant species in the community. The larger the value of q, the more sensitive the index becomes to the presence of dominant species.

Hill numbers offer a flexible framework for assessing biodiversity because they allow you to choose the appropriate value of q based on the specific ecological question or management objective you are addressing. For example:

  • If you want to measure the overall diversity of a community while considering both species richness and evenness, you might use q = 1.

  • If you are concerned about the impact of a few highly dominant species in an ecosystem, you might use q = 2 or a higher value to focus on those dominant species.

#iNEXT package iNEXT is a neat package that can calculate Hill numbers, enabling us to investigate alpha diversity accumulation curves (similar to species accumulation curves). These graphs can help us determine if sufficient sampling was conducted at each site, based on the plateauing of the curves, similar to the rarefaction curves. But iNEXT.3D goes beyond rarefraction and extrapolates alpha diversity numbers beyond the number of samples collected (default is double sample number). Its written by Anne Chao and you can find her papers here: https://sites.google.com/view/chao-lab-website/home

Running iNEXT

#innext uses a weird list of lists, this is a annoying format to get into
#im using raw incident data as the input (thus it is made binary at one point in the code)

#generate a list of the unique sample locations
categories <- unique(data$Boundary_descriptor)
categories
## [1] "Off-farm bare fence" "One year old"        "Two year old"       
## [4] "Three year old"      "Old"                 "On-farm bare fence"
#create empty list
split_list <- list()
#fill empty list with a presence absence OTU from each location

data<-data %>% mutate(unique_ob=paste(Boundary_descriptor,Farm_name,Season)) 

for (category in categories) {
  subset_data <- data %>% filter(Boundary_descriptor == category)
  test<-subset_data %>% ungroup() %>% dplyr::select(unique_ob,`New Inseck key name`,`Sum of Insects/100 flowers`)
  test   <- test %>%  spread(key = unique_ob, value = `Sum of Insects/100 flowers`)
  test<-test %>% remove_rownames %>% column_to_rownames(var="New Inseck key name")
  test<-test %>% mutate(across(everything(), ~ ifelse(. > 0, 1, 0)))
  split_list[[category]]<-as(test,'matrix')
} 

matrix_list <- list(data = list())
for (category in categories) {
  otu_table <- as(split_list[[category]], "matrix")
  matrix_list[["data"]][[category]] <- otu_table
}

#here Im setting the endpoint as NULL which is double the input sample size (this is probably most robust)
out.raw <- iNEXT3D(data = matrix_list$data, diversity = 'TD', q = c(0, 1, 2), datatype = 'incidence_raw', nboot = 50,endpoint=NULL)

#there is various ways to look at these plots:
#within a sample
ggiNEXT3D(out.raw, type = 1, facet.var = 'Assemblage') + facet_wrap(~Assemblage, nrow = 3)

#across samples
ggiNEXT3D(out.raw, type = 1, facet.var = "Order.q")

#sample completeness
ggiNEXT3D(out.raw, type = 2, facet.var = "Order.q", color.var = "Assemblage")

#using innext diversity estimates in a model

Alternative plots for iNEXT

Here i have set the endpoint as 50 - I doubt this is robust for the 1,2,3 year plots but makes the plots even. ‘q1’ in particular looks squiffy at 50.

out.raw <- iNEXT3D(data = matrix_list$data, diversity = 'TD', q = c(0, 1, 2), datatype = 'incidence_raw', nboot = 50,endpoint=50)

#there is various ways to look at these plots:
#within a sample
ggiNEXT3D(out.raw, type = 1, facet.var = 'Assemblage') + facet_wrap(~Assemblage, nrow = 3)

#across samples
ggiNEXT3D(out.raw, type = 1, facet.var = "Order.q")

#sample completeness
ggiNEXT3D(out.raw, type = 2, facet.var = "Order.q", color.var = "Assemblage")